diff --git a/graphengine b/graphengine index 1350673d51b..45ca7863ac6 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit 1350673d51b3f8535bc217a7780e6a0b52ff9a41 +Subproject commit 45ca7863ac6410c8e2f83168481ddc6b43bcea33 diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index 1314e2c09ec..f4e718f024a 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -56,13 +56,16 @@ #include "dataset/kernels/image/pad_op.h" #include "dataset/kernels/image/random_color_adjust_op.h" #include "dataset/kernels/image/random_crop_and_resize_op.h" +#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" #include "dataset/kernels/image/random_crop_decode_resize_op.h" #include "dataset/kernels/image/random_crop_op.h" +#include "dataset/kernels/image/random_crop_with_bbox_op.h" #include "dataset/kernels/image/random_horizontal_flip_bbox_op.h" #include "dataset/kernels/image/random_horizontal_flip_op.h" #include "dataset/kernels/image/random_resize_op.h" #include "dataset/kernels/image/random_rotation_op.h" #include "dataset/kernels/image/random_vertical_flip_op.h" +#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" #include "dataset/kernels/image/rescale_op.h" #include "dataset/kernels/image/resize_bilinear_op.h" #include "dataset/kernels/image/resize_op.h" @@ -381,6 +384,12 @@ void bindTensorOps2(py::module *m) { *m, "RandomVerticalFlipOp", "Tensor operation to randomly flip an image vertically.") .def(py::init(), py::arg("probability") = RandomVerticalFlipOp::kDefProbability); + (void)py::class_>( + *m, "RandomVerticalFlipWithBBoxOp", + "Tensor operation to randomly flip an image vertically" + " and adjust bounding boxes.") + .def(py::init(), py::arg("probability") = RandomVerticalFlipWithBBoxOp::kDefProbability); + (void)py::class_>(*m, "RandomCropOp", "Gives random crop of specified size " "Takes crop size") @@ -392,6 +401,20 @@ void bindTensorOps2(py::module *m) { py::arg("fillG") = RandomCropOp::kDefFillG, py::arg("fillB") = RandomCropOp::kDefFillB); (void)py::class_>(*m, "ChannelSwapOp").def(py::init<>()); + (void)py::class_>(*m, "RandomCropWithBBoxOp", + "Gives random crop of given " + "size + adjusts bboxes " + "Takes crop size") + .def(py::init(), + py::arg("cropHeight"), py::arg("cropWidth"), py::arg("padTop") = RandomCropWithBBoxOp::kDefPadTop, + py::arg("padBottom") = RandomCropWithBBoxOp::kDefPadBottom, + py::arg("padLeft") = RandomCropWithBBoxOp::kDefPadLeft, + py::arg("padRight") = RandomCropWithBBoxOp::kDefPadRight, + py::arg("borderType") = RandomCropWithBBoxOp::kDefBorderType, + py::arg("padIfNeeded") = RandomCropWithBBoxOp::kDefPadIfNeeded, + py::arg("fillR") = RandomCropWithBBoxOp::kDefFillR, py::arg("fillG") = RandomCropWithBBoxOp::kDefFillG, + py::arg("fillB") = RandomCropWithBBoxOp::kDefFillB); + (void)py::class_>( *m, "OneHotOp", "Tensor operation to apply one hot encoding. Takes number of classes.") .def(py::init()); @@ -488,6 +511,20 @@ void bindTensorOps3(py::module *m) { py::arg("interpolation") = RandomCropAndResizeOp::kDefInterpolation, py::arg("maxIter") = RandomCropAndResizeOp::kDefMaxIter); + (void)py::class_>( + *m, "RandomCropAndResizeWithBBoxOp", + "Tensor operation to randomly crop an image (with BBoxes) and resize to a given size." + "Takes output height and width and" + "optional parameters for lower and upper bound for aspect ratio (h/w) and scale," + "interpolation mode, and max attempts to crop") + .def(py::init(), py::arg("targetHeight"), + py::arg("targetWidth"), py::arg("scaleLb") = RandomCropAndResizeWithBBoxOp::kDefScaleLb, + py::arg("scaleUb") = RandomCropAndResizeWithBBoxOp::kDefScaleUb, + py::arg("aspectLb") = RandomCropAndResizeWithBBoxOp::kDefAspectLb, + py::arg("aspectUb") = RandomCropAndResizeWithBBoxOp::kDefAspectUb, + py::arg("interpolation") = RandomCropAndResizeWithBBoxOp::kDefInterpolation, + py::arg("maxIter") = RandomCropAndResizeWithBBoxOp::kDefMaxIter); + (void)py::class_>( *m, "RandomColorAdjustOp", "Tensor operation to adjust an image's color randomly." diff --git a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt index 05705e64dd8..557b496a957 100644 --- a/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/kernels/image/CMakeLists.txt @@ -10,14 +10,17 @@ add_library(kernels-image OBJECT pad_op.cc random_color_adjust_op.cc random_crop_decode_resize_op.cc + random_crop_and_resize_with_bbox_op.cc random_crop_and_resize_op.cc random_crop_op.cc + random_crop_with_bbox_op.cc random_horizontal_flip_op.cc random_horizontal_flip_bbox_op.cc bounding_box_augment_op.cc random_resize_op.cc random_rotation_op.cc random_vertical_flip_op.cc + random_vertical_flip_with_bbox_op.cc rescale_op.cc resize_bilinear_op.cc resize_op.cc diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc index 7c31a94b52f..db4cc0ec89c 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc @@ -16,6 +16,7 @@ #include "dataset/kernels/image/image_utils.h" #include #include +#include #include #include #include @@ -724,5 +725,101 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); } } +// -------- BBOX OPERATIONS -------- // +void UpdateBBoxesForCrop(std::shared_ptr *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax, + int *CB_Ymax) { + // PASS LIST, COUNT OF BOUNDING BOXES + // Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions + uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t; + + std::vector correctInd; + std::vector copyVals; + dsize_t bboxDim = (*bboxList)->shape()[1]; + bool retFlag = false; // true unless overlap found + for (int i = 0; i < *bboxCount; i++) { + int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax; + (*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0}); + (*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1}); + (*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2}); + (*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3}); + bb_Xmin = bb_Xmin_t; + bb_Ymin = bb_Ymin_t; + bb_Xmax = bb_Xmax_t; + bb_Ymax = bb_Ymax_t; + bb_Xmax = bb_Xmin + bb_Xmax; + bb_Ymax = bb_Ymin + bb_Ymax; + // check for image / BB overlap + if (((bb_Xmin > *CB_Xmax) || (bb_Ymin > *CB_Ymax)) || ((bb_Xmax < *CB_Xmin) || (bb_Ymax < *CB_Ymin))) { + retFlag = true; // no overlap found + } + if (retFlag) { // invalid bbox no longer within image region - reset to zero + continue; + } + // Update this bbox and select it to move to the final output tensor + correctInd.push_back(i); + // adjust BBox corners by bringing into new CropBox if beyond + // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin + bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - *CB_Xmin)) + *CB_Xmin); + bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - *CB_Xmax)) + *CB_Xmin); + bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - *CB_Ymin)) + *CB_Ymin); + bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - *CB_Ymax)) + *CB_Ymin); + // reset min values and calculate width/height from Box corners + (*bboxList)->SetItemAt({i, 0}, (uint32_t)(bb_Xmin)); + (*bboxList)->SetItemAt({i, 1}, (uint32_t)(bb_Ymin)); + (*bboxList)->SetItemAt({i, 2}, (uint32_t)(bb_Xmax - bb_Xmin)); + (*bboxList)->SetItemAt({i, 3}, (uint32_t)(bb_Ymax - bb_Ymin)); + } + // create new tensor and copy over bboxes still valid to the image + // bboxes outside of new cropped region are ignored - empty tensor returned in case of none + *bboxCount = correctInd.size(); + uint32_t temp; + for (auto slice : correctInd) { // for every index in the loop + for (int ix = 0; ix < bboxDim; ix++) { + (*bboxList)->GetUnsignedIntAt(&temp, {slice, ix}); + copyVals.push_back(temp); + } + } + std::shared_ptr retV; + Tensor::CreateTensor(&retV, copyVals, TensorShape({(dsize_t)bboxCount, bboxDim})); + (*bboxList) = retV; // reset pointer +} + +void PadBBoxes(std::shared_ptr *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left) { + uint32_t xMin = 0; + uint32_t yMin = 0; + for (int i = 0; i < *bboxCount; i++) { + (*bboxList)->GetUnsignedIntAt(&xMin, {i, 0}); + (*bboxList)->GetUnsignedIntAt(&yMin, {i, 1}); + xMin = xMin + (uint32_t)(*pad_left); // should not be negative + yMin = yMin + (uint32_t)(*pad_top); + (*bboxList)->SetItemAt({i, 0}, xMin); + (*bboxList)->SetItemAt({i, 1}, yMin); + } +} + +void UpdateBBoxesForResize(std::shared_ptr *bboxList, size_t *bboxCount, int32_t *target_width_, + int32_t *target_height_, int *orig_width, int *orig_height) { + uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth; + // cast to float to preseve fractional + double W_aspRatio = (*target_width_ * 1.0) / (*orig_width * 1.0); + double H_aspRatio = (*target_height_ * 1.0) / (*orig_height * 1.0); + for (int i = 0; i < *bboxCount; i++) { + // for each bounding box + (*bboxList)->GetUnsignedIntAt(&bb_Xmin, {i, 0}); + (*bboxList)->GetUnsignedIntAt(&bb_Ymin, {i, 1}); + (*bboxList)->GetUnsignedIntAt(&bb_Xwidth, {i, 2}); + (*bboxList)->GetUnsignedIntAt(&bb_Ywidth, {i, 3}); + // update positions and widths + bb_Xmin = bb_Xmin * W_aspRatio; + bb_Ymin = bb_Ymin * H_aspRatio; + bb_Xwidth = bb_Xwidth * W_aspRatio; + bb_Ywidth = bb_Ywidth * H_aspRatio; + // reset bounding box values + (*bboxList)->SetItemAt({i, 0}, (uint32_t)bb_Xmin); + (*bboxList)->SetItemAt({i, 1}, (uint32_t)bb_Ymin); + (*bboxList)->SetItemAt({i, 2}, (uint32_t)bb_Xwidth); + (*bboxList)->SetItemAt({i, 3}, (uint32_t)bb_Ywidth); + } +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/dataset/kernels/image/image_utils.h index b44f953f97b..cf977139b53 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h @@ -225,7 +225,39 @@ Status Erase(const std::shared_ptr &input, std::shared_ptr *outp Status Pad(const std::shared_ptr &input, std::shared_ptr *output, const int32_t &pad_top, const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0); + +// -------- BBOX OPERATIONS -------- // +// Updates and checks bounding boxes for new cropped region of image +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param CB_Xmin: Images's CropBox Xmin coordinate +// @param CB_Xmin: Images's CropBox Ymin coordinate +// @param CB_Xmax: Images's CropBox Xmax coordinate - (Xmin + width) +// @param CB_Xmax: Images's CropBox Ymax coordinate - (Ymin + height) +void UpdateBBoxesForCrop(std::shared_ptr *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax, + int *CB_Ymax); + +// Updates bounding boxes with required Top and Left padding +// Top and Left padding amounts required to adjust bboxs min X,Y values according to padding 'push' +// Top/Left since images 0,0 coordinate is taken from top left +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param pad_top: Total amount of padding applied to image top +// @param pad_left: Total amount of padding applied to image left side +void PadBBoxes(std::shared_ptr *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left); + +// Updates bounding boxes for an Image Resize Operation - Takes in set of valid BBoxes +// For e.g those that remain after a crop +// @param bboxList: A tensor contaning bounding box tensors +// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop +// @param bboxList: A tensor contaning bounding box tensors +// @param target_width_: required width of image post resize +// @param target_width_: required height of image post resize +// @param orig_width: current width of image pre resize +// @param orig_height: current height of image pre resize +void UpdateBBoxesForResize(std::shared_ptr *bboxList, size_t *bboxCount, int32_t *target_width_, + int32_t *target_height_, int *orig_width, int *orig_height); + } // namespace dataset } // namespace mindspore - #endif // DATASET_KERNELS_IMAGE_IMAGE_UTILS_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc new file mode 100644 index 00000000000..94607f46074 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "dataset/util/random.h" +#include "dataset/util/status.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h" + +namespace mindspore { +namespace dataset { + +Status RandomCropAndResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, "The shape of input is abnormal"); + + (*output).push_back(nullptr); // init memory for return vector + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); // move boxes over to output + + size_t bboxCount = input[1]->shape()[0]; // number of rows in bbox tensor + int h_in = input[0]->shape()[0]; + int w_in = input[0]->shape()[1]; + int x = 0; + int y = 0; + int crop_height = 0; + int crop_width = 0; + + (void)RandomCropAndResizeOp::GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width); + + int maxX = x + crop_width; // max dims of selected CropBox on image + int maxY = y + crop_height; + + UpdateBBoxesForCrop(&(*output)[1], &bboxCount, &x, &y, &maxX, &maxY); // IMAGE_UTIL + RETURN_IF_NOT_OK(CropAndResize(input[0], &(*output)[0], x, y, crop_height, crop_width, target_height_, target_width_, + interpolation_)); + + UpdateBBoxesForResize(&(*output)[1], &bboxCount, &target_width_, &target_height_, &crop_width, &crop_height); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h new file mode 100644 index 00000000000..9675d43933b --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_with_bbox_op.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ + +#include "dataset/kernels/image/random_crop_and_resize_op.h" + +namespace mindspore { +namespace dataset { + +class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp { + public: + // Constructor for RandomCropAndResizeWithBBoxOp, with default value and passing to base class constructor + RandomCropAndResizeWithBBoxOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb, + float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, + float aspect_ub = kDefAspectUb, InterpolationMode interpolation = kDefInterpolation, + int32_t max_iter = kDefMaxIter) + : RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation, + max_iter) {} + + ~RandomCropAndResizeWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { + out << "RandomCropAndResizeWithBBox: " << RandomCropAndResizeOp::target_height_ << " " + << RandomCropAndResizeOp::target_width_; + } + + Status Compute(const TensorRow &input, TensorRow *output) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_AND_RESIZE_WITH_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc index 7662c64cc40..256d4d88dc2 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.cc @@ -48,44 +48,81 @@ RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_ rnd_.seed(GetSeed()); } +Status RandomCropOp::ImagePadding(const std::shared_ptr &input, std::shared_ptr *pad_image, + int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, + int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) { + *t_pad_top = pad_top_; + *t_pad_bottom = pad_bottom_; + *t_pad_left = pad_left_; + *t_pad_right = pad_right_; + + RETURN_IF_NOT_OK( + Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); + CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "Abnormal shape"); + + *padded_image_h = (*pad_image)->shape()[0]; + *padded_image_w = (*pad_image)->shape()[1]; + + if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) { + *crop_further = false; // no need for further crop + return Status::OK(); + } else if (pad_if_needed_) { + // check the dimensions of the image for padding, if we do need padding, then we change the pad values + if (*padded_image_h < crop_height_) { + RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0, + border_type_, fill_r_, fill_g_, fill_b_)); + + // update pad total above/below + t_pad_top += (crop_height_ - *padded_image_h); + t_pad_bottom += (crop_height_ - *padded_image_h); + } + if (*padded_image_w < crop_width_) { + RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w, + border_type_, fill_r_, fill_g_, fill_b_)); + // update pad total left/right + t_pad_left += (crop_width_ - *padded_image_w); + t_pad_right += (crop_width_ - *padded_image_w); + } + *padded_image_h = (*pad_image)->shape()[0]; + *padded_image_w = (*pad_image)->shape()[1]; + } + + if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { + return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__, + "Crop size is greater than the image dimensions or is zero."); + } + return Status::OK(); +} + +void RandomCropOp::GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h) { + // GenCropPoints for cropping + *x = std::uniform_int_distribution(0, *padded_image_w - crop_width_)(rnd_); + *y = std::uniform_int_distribution(0, *padded_image_h - crop_height_)(rnd_); +} + Status RandomCropOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); // Apply padding first then crop std::shared_ptr pad_image; + int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; + int32_t padded_image_w; + int32_t padded_image_h; + bool crop_further = true; // whether image needs further cropping based on new size & requirements - RETURN_IF_NOT_OK( - Pad(input, &pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); - CHECK_FAIL_RETURN_UNEXPECTED(pad_image->shape().Size() >= 2, "Abnormal shape"); - int32_t padded_image_h = pad_image->shape()[0]; - int32_t padded_image_w = pad_image->shape()[1]; - // no need to crop if same size - if (padded_image_h == crop_height_ && padded_image_w == crop_width_) { + RETURN_IF_NOT_OK( // error code sent back directly + ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w, + &padded_image_h, &crop_further)); + if (!crop_further) { *output = pad_image; return Status::OK(); } - if (pad_if_needed_) { - // check the dimensions of the image for padding, if we do need padding, then we change the pad values - if (padded_image_h < crop_height_) { - RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, crop_height_ - padded_image_h, crop_height_ - padded_image_h, 0, 0, - border_type_, fill_r_, fill_g_, fill_b_)); - } - if (padded_image_w < crop_width_) { - RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, 0, 0, crop_width_ - padded_image_w, crop_width_ - padded_image_w, - border_type_, fill_r_, fill_g_, fill_b_)); - } - padded_image_h = pad_image->shape()[0]; - padded_image_w = pad_image->shape()[1]; - } - if (padded_image_h < crop_height_ || padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { - return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__, - "Crop size is greater than the image dimensions or is zero."); - } - // random top corner - int x = std::uniform_int_distribution(0, padded_image_w - crop_width_)(rnd_); - int y = std::uniform_int_distribution(0, padded_image_h - crop_height_)(rnd_); + + int x, y; + GenRandomXY(&x, &y, &padded_image_w, &padded_image_h); return Crop(pad_image, output, x, y, crop_width_, crop_height_); } + Status RandomCropOp::OutputShape(const std::vector &inputs, std::vector &outputs) { RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); outputs.clear(); diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h index d4ec49cd7b4..3e0984245bc 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_op.h @@ -50,11 +50,20 @@ class RandomCropOp : public TensorOp { void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; } Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + Status ImagePadding(const std::shared_ptr &input, std::shared_ptr *pad_image, int32_t *t_pad_top, + int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, int32_t *padded_image_w, + int32_t *padded_image_h, bool *crop_further); + + void GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h); + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; - private: + protected: int32_t crop_height_ = 0; int32_t crop_width_ = 0; + + private: int32_t pad_top_ = 0; int32_t pad_bottom_ = 0; int32_t pad_left_ = 0; diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc new file mode 100644 index 00000000000..10ab94092e3 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "dataset/kernels/image/random_crop_with_bbox_op.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/util/random.h" +#include "dataset/util/status.h" + +namespace mindspore { +namespace dataset { +Status RandomCropWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + + std::shared_ptr pad_image; + int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right; + size_t boxCount = input[1]->shape()[0]; // number of rows + + int32_t padded_image_h; + int32_t padded_image_w; + + (*output).push_back(nullptr); + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); // since some boxes may be removed + + bool crop_further = true; // Whether further cropping will be required or not, true unless required size matches + RETURN_IF_NOT_OK( // Error passed back to caller + RandomCropOp::ImagePadding(input[0], &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, + &padded_image_w, &padded_image_h, &crop_further)); + + // update bounding boxes with new values based on relevant image padding + if (t_pad_left || t_pad_bottom) { + PadBBoxes(&(*output)[1], &boxCount, &t_pad_left, &t_pad_top); + } + if (!crop_further) { + // no further cropping required + (*output)[0] = pad_image; + (*output)[1] = std::move(input[1]); + return Status::OK(); + } + + int x, y; + RandomCropOp::GenRandomXY(&x, &y, &padded_image_w, &padded_image_h); + int maxX = x + RandomCropOp::crop_width_; // max dims of selected CropBox on image + int maxY = y + RandomCropOp::crop_height_; + UpdateBBoxesForCrop(&(*output)[1], &boxCount, &x, &y, &maxX, &maxY); + return Crop(pad_image, &(*output)[0], x, y, RandomCropOp::crop_width_, RandomCropOp::crop_height_); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h new file mode 100644 index 00000000000..88a58d35574 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_with_bbox_op.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ + +#include +#include + +#include "dataset/kernels/image/random_crop_op.h" + +namespace mindspore { +namespace dataset { +class RandomCropWithBBoxOp : public RandomCropOp { + public: + // Constructor for RandomCropWithBBoxOp, with default value and passing to base class constructor + RandomCropWithBBoxOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop, + int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft, + int32_t pad_right = kDefPadRight, BorderType border_types = kDefBorderType, + bool pad_if_needed = kDefPadIfNeeded, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, + uint8_t fill_b = kDefFillB) + : RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, border_types, pad_if_needed, + fill_r, fill_g, fill_b) {} + + ~RandomCropWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { + out << "RandomCropWithBBoxOp: " << RandomCropOp::crop_height_ << " " << RandomCropOp::crop_width_; + } + + Status Compute(const TensorRow &input, TensorRow *output) override; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_CROP_WITH_BBOX_OP_H_ diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc new file mode 100644 index 00000000000..2f11db96c51 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "dataset/util/status.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h" + +namespace mindspore { +namespace dataset { +const float RandomVerticalFlipWithBBoxOp::kDefProbability = 0.5; +Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { + IO_CHECK_VECTOR(input, output); + BOUNDING_BOX_CHECK(input); + + if (distribution_(rnd_)) { + dsize_t imHeight = input[0]->shape()[0]; + size_t boxCount = input[1]->shape()[0]; // number of rows in tensor + + // one time allocation -> updated in the loop + // type defined based on VOC test dataset + for (int i = 0; i < boxCount; i++) { + uint32_t boxCorner_y = 0; + uint32_t boxHeight = 0; + uint32_t newBoxCorner_y = 0; + input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1}); // get min y of bbox + input[1]->GetUnsignedIntAt(&boxHeight, {i, 3}); // get height of bbox + + // subtract (curCorner + height) from (max) for new Corner position + newBoxCorner_y = (imHeight - 1) - (boxCorner_y + boxHeight); + input[1]->SetItemAt({i, 1}, newBoxCorner_y); + } + + (*output).push_back(nullptr); + (*output).push_back(nullptr); + (*output)[1] = std::move(input[1]); + + return VerticalFlip(input[0], &(*output)[0]); + } + *output = input; + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h new file mode 100644 index 00000000000..4764cc2b752 --- /dev/null +++ b/mindspore/ccsrc/dataset/kernels/image/random_vertical_flip_with_bbox_op.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ +#define DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ + +#include +#include + +#include "dataset/core/tensor.h" +#include "dataset/kernels/tensor_op.h" +#include "dataset/util/status.h" +#include "dataset/util/random.h" + +namespace mindspore { +namespace dataset { +class RandomVerticalFlipWithBBoxOp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const float kDefProbability; + // Constructor for RandomVerticalFlipWithBBoxOp + // @param probability: Probablity of Image flipping, 0.5 by default + explicit RandomVerticalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) { + rnd_.seed(GetSeed()); + } + + ~RandomVerticalFlipWithBBoxOp() override = default; + + void Print(std::ostream &out) const override { out << "RandomVerticalFlipWithBBoxOp"; } + + Status Compute(const TensorRow &input, TensorRow *output) override; + + private: + std::mt19937 rnd_; + std::bernoulli_distribution distribution_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_VERTICAL_FLIP_WITH_BBOX_OP_H_ diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index 3d4c6da2aeb..a7b00f0abd3 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -149,6 +149,54 @@ class RandomCrop(cde.RandomCropOp): super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) +class RandomCropWithBBox(cde.RandomCropWithBBoxOp): + """ + Crop the input image at a random location, and adjust bounding boxes + + Args: + size (int or sequence): The output size of the cropped image. + If size is an int, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + padding (int or sequence, optional): The number of pixels to pad the image (default=None). + If padding is not None, pad image firstly with padding values. + If a single number is provided, it pads all borders with this value. + If a tuple or list of 2 values are provided, it pads the (left and top) + with the first value and (right and bottom) with the second value. + If 4 values are provided as a list or tuple,it pads the left, top, right and bottom respectively. + pad_if_needed (bool, optional): Pad the image if either side is smaller than + the given output size (default=False). + fill_value (int or tuple, optional): The pixel intensity of the borders if + the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to + fill R, G, B channels respectively. + padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of + [Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC]. + + - Border.CONSTANT, means it fills the border with constant values. + + - Border.EDGE, means it pads with the last value on the edge. + + - Border.REFLECT, means it reflects the values on the edge omitting the last + value of edge. + + - Border.SYMMETRIC, means it reflects the values on the edge repeating the last + value of edge. + """ + + @check_random_crop + def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT): + self.size = size + self.padding = padding + self.pad_if_needed = pad_if_needed + self.fill_value = fill_value + self.padding_mode = padding_mode.value + if padding is None: + padding = (0, 0, 0, 0) + if isinstance(fill_value, int): # temporary fix + fill_value = tuple([fill_value] * 3) + border_type = DE_C_BORDER_TYPE[padding_mode] + super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value) + + class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): """ Flip the input image horizontally, randomly with a given probability. @@ -192,6 +240,20 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp): super().__init__(prob) +class RandomVerticalFlipWithBBox(cde.RandomVerticalFlipWithBBoxOp): + """ + Flip the input image vertically and adjust bounding boxes, randomly with a given probability. + + Args: + prob (float): Probability of the image being flipped (default=0.5). + """ + + @check_prob + def __init__(self, prob=0.5): + self.prob = prob + super().__init__(prob) + + class BoundingBoxAug(cde.BoundingBoxAugOp): """ Flip the input image vertically, randomly with a given probability. @@ -237,6 +299,42 @@ class Resize(cde.ResizeOp): super().__init__(*size, interpoltn) +class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp): + """ + Crop the input image to a random size and aspect ratio and adjust the Bounding Boxes accordingly + + Args: + size (int or sequence): The size of the output image. + If size is an int, a square crop of size (size, size) is returned. + If size is a sequence of length 2, it should be (height, width). + scale (tuple, optional): Range (min, max) of respective size of the original + size to be cropped (default=(0.08, 1.0)). + ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped + (default=(3. / 4., 4. / 3.)). + interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR). + It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC]. + + - Inter.BILINEAR, means interpolation method is bilinear interpolation. + + - Inter.NEAREST, means interpolation method is nearest-neighbor interpolation. + + - Inter.BICUBIC, means interpolation method is bicubic interpolation. + + max_attempts (int, optional): The maximum number of attempts to propose a valid + crop_area (default=10). If exceeded, fall back to use center_crop instead. + """ + @check_random_resize_crop + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), + interpolation=Inter.BILINEAR, max_attempts=10): + self.size = size + self.scale = scale + self.ratio = ratio + self.interpolation = interpolation + self.max_attempts = max_attempts + interpoltn = DE_C_INTER_MODE[interpolation] + super().__init__(*size, *scale, *ratio, interpoltn, max_attempts) + + class RandomResizedCrop(cde.RandomCropAndResizeOp): """ Crop the input image to a random size and aspect ratio.