my files added back in

another commit





This commit is contained in:
Danish Farid 2020-06-18 22:43:42 -04:00
parent 1424178601
commit c1e6ece563
14 changed files with 672 additions and 30 deletions

@ -1 +1 @@
Subproject commit 1350673d51b3f8535bc217a7780e6a0b52ff9a41
Subproject commit 45ca7863ac6410c8e2f83168481ddc6b43bcea33

View File

@ -56,13 +56,16 @@
#include "dataset/kernels/image/pad_op.h"
#include "dataset/kernels/image/random_color_adjust_op.h"
#include "dataset/kernels/image/random_crop_and_resize_op.h"
#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
#include "dataset/kernels/image/random_crop_decode_resize_op.h"
#include "dataset/kernels/image/random_crop_op.h"
#include "dataset/kernels/image/random_crop_with_bbox_op.h"
#include "dataset/kernels/image/random_horizontal_flip_bbox_op.h"
#include "dataset/kernels/image/random_horizontal_flip_op.h"
#include "dataset/kernels/image/random_resize_op.h"
#include "dataset/kernels/image/random_rotation_op.h"
#include "dataset/kernels/image/random_vertical_flip_op.h"
#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
#include "dataset/kernels/image/rescale_op.h"
#include "dataset/kernels/image/resize_bilinear_op.h"
#include "dataset/kernels/image/resize_op.h"
@ -381,6 +384,12 @@ void bindTensorOps2(py::module *m) {
*m, "RandomVerticalFlipOp", "Tensor operation to randomly flip an image vertically.")
.def(py::init<float>(), py::arg("probability") = RandomVerticalFlipOp::kDefProbability);
(void)py::class_<RandomVerticalFlipWithBBoxOp, TensorOp, std::shared_ptr<RandomVerticalFlipWithBBoxOp>>(
*m, "RandomVerticalFlipWithBBoxOp",
"Tensor operation to randomly flip an image vertically"
" and adjust bounding boxes.")
.def(py::init<float>(), py::arg("probability") = RandomVerticalFlipWithBBoxOp::kDefProbability);
(void)py::class_<RandomCropOp, TensorOp, std::shared_ptr<RandomCropOp>>(*m, "RandomCropOp",
"Gives random crop of specified size "
"Takes crop size")
@ -392,6 +401,20 @@ void bindTensorOps2(py::module *m) {
py::arg("fillG") = RandomCropOp::kDefFillG, py::arg("fillB") = RandomCropOp::kDefFillB);
(void)py::class_<HwcToChwOp, TensorOp, std::shared_ptr<HwcToChwOp>>(*m, "ChannelSwapOp").def(py::init<>());
(void)py::class_<RandomCropWithBBoxOp, TensorOp, std::shared_ptr<RandomCropWithBBoxOp>>(*m, "RandomCropWithBBoxOp",
"Gives random crop of given "
"size + adjusts bboxes "
"Takes crop size")
.def(py::init<int32_t, int32_t, int32_t, int32_t, int32_t, int32_t, BorderType, bool, uint8_t, uint8_t, uint8_t>(),
py::arg("cropHeight"), py::arg("cropWidth"), py::arg("padTop") = RandomCropWithBBoxOp::kDefPadTop,
py::arg("padBottom") = RandomCropWithBBoxOp::kDefPadBottom,
py::arg("padLeft") = RandomCropWithBBoxOp::kDefPadLeft,
py::arg("padRight") = RandomCropWithBBoxOp::kDefPadRight,
py::arg("borderType") = RandomCropWithBBoxOp::kDefBorderType,
py::arg("padIfNeeded") = RandomCropWithBBoxOp::kDefPadIfNeeded,
py::arg("fillR") = RandomCropWithBBoxOp::kDefFillR, py::arg("fillG") = RandomCropWithBBoxOp::kDefFillG,
py::arg("fillB") = RandomCropWithBBoxOp::kDefFillB);
(void)py::class_<OneHotOp, TensorOp, std::shared_ptr<OneHotOp>>(
*m, "OneHotOp", "Tensor operation to apply one hot encoding. Takes number of classes.")
@ -488,6 +511,20 @@ void bindTensorOps3(py::module *m) {
py::arg("interpolation") = RandomCropAndResizeOp::kDefInterpolation,
py::arg("maxIter") = RandomCropAndResizeOp::kDefMaxIter);
(void)py::class_<RandomCropAndResizeWithBBoxOp, TensorOp, std::shared_ptr<RandomCropAndResizeWithBBoxOp>>(
*m, "RandomCropAndResizeWithBBoxOp",
"Tensor operation to randomly crop an image (with BBoxes) and resize to a given size."
"Takes output height and width and"
"optional parameters for lower and upper bound for aspect ratio (h/w) and scale,"
"interpolation mode, and max attempts to crop")
.def(py::init<int32_t, int32_t, float, float, float, float, InterpolationMode, int32_t>(), py::arg("targetHeight"),
py::arg("targetWidth"), py::arg("scaleLb") = RandomCropAndResizeWithBBoxOp::kDefScaleLb,
py::arg("scaleUb") = RandomCropAndResizeWithBBoxOp::kDefScaleUb,
py::arg("aspectLb") = RandomCropAndResizeWithBBoxOp::kDefAspectLb,
py::arg("aspectUb") = RandomCropAndResizeWithBBoxOp::kDefAspectUb,
py::arg("interpolation") = RandomCropAndResizeWithBBoxOp::kDefInterpolation,
py::arg("maxIter") = RandomCropAndResizeWithBBoxOp::kDefMaxIter);
(void)py::class_<RandomColorAdjustOp, TensorOp, std::shared_ptr<RandomColorAdjustOp>>(
*m, "RandomColorAdjustOp",
"Tensor operation to adjust an image's color randomly."

View File

@ -10,14 +10,17 @@ add_library(kernels-image OBJECT

View File

@ -16,6 +16,7 @@
#include "dataset/kernels/image/image_utils.h"
#include <opencv2/imgproc/types_c.h>
#include <algorithm>
#include <vector>
#include <stdexcept>
#include <utility>
#include <opencv2/imgcodecs.hpp>
@ -724,5 +725,101 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
RETURN_STATUS_UNEXPECTED("Unexpected error in pad");
// -------- BBOX OPERATIONS -------- //
void UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax,
int *CB_Ymax) {
// Also PAss X/Y Min/Max of image cropped region - normally obtained from 'GetCropBox' functions
uint32_t bb_Xmin_t, bb_Ymin_t, bb_Xmax_t, bb_Ymax_t;
std::vector<int> correctInd;
std::vector<uint32_t> copyVals;
dsize_t bboxDim = (*bboxList)->shape()[1];
bool retFlag = false; // true unless overlap found
for (int i = 0; i < *bboxCount; i++) {
int bb_Xmin, bb_Xmax, bb_Ymin, bb_Ymax;
(*bboxList)->GetUnsignedIntAt(&bb_Xmin_t, {i, 0});
(*bboxList)->GetUnsignedIntAt(&bb_Ymin_t, {i, 1});
(*bboxList)->GetUnsignedIntAt(&bb_Xmax_t, {i, 2});
(*bboxList)->GetUnsignedIntAt(&bb_Ymax_t, {i, 3});
bb_Xmin = bb_Xmin_t;
bb_Ymin = bb_Ymin_t;
bb_Xmax = bb_Xmax_t;
bb_Ymax = bb_Ymax_t;
bb_Xmax = bb_Xmin + bb_Xmax;
bb_Ymax = bb_Ymin + bb_Ymax;
// check for image / BB overlap
if (((bb_Xmin > *CB_Xmax) || (bb_Ymin > *CB_Ymax)) || ((bb_Xmax < *CB_Xmin) || (bb_Ymax < *CB_Ymin))) {
retFlag = true; // no overlap found
if (retFlag) { // invalid bbox no longer within image region - reset to zero
// Update this bbox and select it to move to the final output tensor
// adjust BBox corners by bringing into new CropBox if beyond
// Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin
bb_Xmin = bb_Xmin - (std::min(0, (bb_Xmin - *CB_Xmin)) + *CB_Xmin);
bb_Xmax = bb_Xmax - (std::max(0, (bb_Xmax - *CB_Xmax)) + *CB_Xmin);
bb_Ymin = bb_Ymin - (std::min(0, (bb_Ymin - *CB_Ymin)) + *CB_Ymin);
bb_Ymax = bb_Ymax - (std::max(0, (bb_Ymax - *CB_Ymax)) + *CB_Ymin);
// reset min values and calculate width/height from Box corners
(*bboxList)->SetItemAt({i, 0}, (uint32_t)(bb_Xmin));
(*bboxList)->SetItemAt({i, 1}, (uint32_t)(bb_Ymin));
(*bboxList)->SetItemAt({i, 2}, (uint32_t)(bb_Xmax - bb_Xmin));
(*bboxList)->SetItemAt({i, 3}, (uint32_t)(bb_Ymax - bb_Ymin));
// create new tensor and copy over bboxes still valid to the image
// bboxes outside of new cropped region are ignored - empty tensor returned in case of none
*bboxCount = correctInd.size();
uint32_t temp;
for (auto slice : correctInd) { // for every index in the loop
for (int ix = 0; ix < bboxDim; ix++) {
(*bboxList)->GetUnsignedIntAt(&temp, {slice, ix});
std::shared_ptr<Tensor> retV;
Tensor::CreateTensor(&retV, copyVals, TensorShape({(dsize_t)bboxCount, bboxDim}));
(*bboxList) = retV; // reset pointer
void PadBBoxes(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left) {
uint32_t xMin = 0;
uint32_t yMin = 0;
for (int i = 0; i < *bboxCount; i++) {
(*bboxList)->GetUnsignedIntAt(&xMin, {i, 0});
(*bboxList)->GetUnsignedIntAt(&yMin, {i, 1});
xMin = xMin + (uint32_t)(*pad_left); // should not be negative
yMin = yMin + (uint32_t)(*pad_top);
(*bboxList)->SetItemAt({i, 0}, xMin);
(*bboxList)->SetItemAt({i, 1}, yMin);
void UpdateBBoxesForResize(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *target_width_,
int32_t *target_height_, int *orig_width, int *orig_height) {
uint32_t bb_Xmin, bb_Ymin, bb_Xwidth, bb_Ywidth;
// cast to float to preseve fractional
double W_aspRatio = (*target_width_ * 1.0) / (*orig_width * 1.0);
double H_aspRatio = (*target_height_ * 1.0) / (*orig_height * 1.0);
for (int i = 0; i < *bboxCount; i++) {
// for each bounding box
(*bboxList)->GetUnsignedIntAt(&bb_Xmin, {i, 0});
(*bboxList)->GetUnsignedIntAt(&bb_Ymin, {i, 1});
(*bboxList)->GetUnsignedIntAt(&bb_Xwidth, {i, 2});
(*bboxList)->GetUnsignedIntAt(&bb_Ywidth, {i, 3});
// update positions and widths
bb_Xmin = bb_Xmin * W_aspRatio;
bb_Ymin = bb_Ymin * H_aspRatio;
bb_Xwidth = bb_Xwidth * W_aspRatio;
bb_Ywidth = bb_Ywidth * H_aspRatio;
// reset bounding box values
(*bboxList)->SetItemAt({i, 0}, (uint32_t)bb_Xmin);
(*bboxList)->SetItemAt({i, 1}, (uint32_t)bb_Ymin);
(*bboxList)->SetItemAt({i, 2}, (uint32_t)bb_Xwidth);
(*bboxList)->SetItemAt({i, 3}, (uint32_t)bb_Ywidth);
} // namespace dataset
} // namespace mindspore

View File

@ -225,7 +225,39 @@ Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outp
Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const int32_t &pad_top,
const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types,
uint8_t fill_r = 0, uint8_t fill_g = 0, uint8_t fill_b = 0);
// -------- BBOX OPERATIONS -------- //
// Updates and checks bounding boxes for new cropped region of image
// @param bboxList: A tensor contaning bounding box tensors
// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
// @param CB_Xmin: Images's CropBox Xmin coordinate
// @param CB_Xmin: Images's CropBox Ymin coordinate
// @param CB_Xmax: Images's CropBox Xmax coordinate - (Xmin + width)
// @param CB_Xmax: Images's CropBox Ymax coordinate - (Ymin + height)
void UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int *CB_Xmin, int *CB_Ymin, int *CB_Xmax,
int *CB_Ymax);
// Updates bounding boxes with required Top and Left padding
// Top and Left padding amounts required to adjust bboxs min X,Y values according to padding 'push'
// Top/Left since images 0,0 coordinate is taken from top left
// @param bboxList: A tensor contaning bounding box tensors
// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
// @param pad_top: Total amount of padding applied to image top
// @param pad_left: Total amount of padding applied to image left side
void PadBBoxes(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *pad_top, int32_t *pad_left);
// Updates bounding boxes for an Image Resize Operation - Takes in set of valid BBoxes
// For e.g those that remain after a crop
// @param bboxList: A tensor contaning bounding box tensors
// @param bboxCount: total Number of bounding boxes - required within caller function to run update loop
// @param bboxList: A tensor contaning bounding box tensors
// @param target_width_: required width of image post resize
// @param target_width_: required height of image post resize
// @param orig_width: current width of image pre resize
// @param orig_height: current height of image pre resize
void UpdateBBoxesForResize(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount, int32_t *target_width_,
int32_t *target_height_, int *orig_width, int *orig_height);
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,58 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <random>
#include <utility>
#include "dataset/util/random.h"
#include "dataset/util/status.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/kernels/image/random_crop_and_resize_with_bbox_op.h"
namespace mindspore {
namespace dataset {
Status RandomCropAndResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, "The shape of input is abnormal");
(*output).push_back(nullptr); // init memory for return vector
(*output)[1] = std::move(input[1]); // move boxes over to output
size_t bboxCount = input[1]->shape()[0]; // number of rows in bbox tensor
int h_in = input[0]->shape()[0];
int w_in = input[0]->shape()[1];
int x = 0;
int y = 0;
int crop_height = 0;
int crop_width = 0;
(void)RandomCropAndResizeOp::GetCropBox(h_in, w_in, &x, &y, &crop_height, &crop_width);
int maxX = x + crop_width; // max dims of selected CropBox on image
int maxY = y + crop_height;
UpdateBBoxesForCrop(&(*output)[1], &bboxCount, &x, &y, &maxX, &maxY); // IMAGE_UTIL
RETURN_IF_NOT_OK(CropAndResize(input[0], &(*output)[0], x, y, crop_height, crop_width, target_height_, target_width_,
UpdateBBoxesForResize(&(*output)[1], &bboxCount, &target_width_, &target_height_, &crop_width, &crop_height);
return Status::OK();
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,46 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include "dataset/kernels/image/random_crop_and_resize_op.h"
namespace mindspore {
namespace dataset {
class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp {
// Constructor for RandomCropAndResizeWithBBoxOp, with default value and passing to base class constructor
RandomCropAndResizeWithBBoxOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb,
float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb,
float aspect_ub = kDefAspectUb, InterpolationMode interpolation = kDefInterpolation,
int32_t max_iter = kDefMaxIter)
: RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation,
max_iter) {}
~RandomCropAndResizeWithBBoxOp() override = default;
void Print(std::ostream &out) const override {
out << "RandomCropAndResizeWithBBox: " << RandomCropAndResizeOp::target_height_ << " "
<< RandomCropAndResizeOp::target_width_;
Status Compute(const TensorRow &input, TensorRow *output) override;
} // namespace dataset
} // namespace mindspore

View File

@ -48,44 +48,81 @@ RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_
Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image,
int32_t *t_pad_top, int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right,
int32_t *padded_image_w, int32_t *padded_image_h, bool *crop_further) {
*t_pad_top = pad_top_;
*t_pad_bottom = pad_bottom_;
*t_pad_left = pad_left_;
*t_pad_right = pad_right_;
Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_));
CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "Abnormal shape");
*padded_image_h = (*pad_image)->shape()[0];
*padded_image_w = (*pad_image)->shape()[1];
if (*padded_image_h == crop_height_ && *padded_image_w == crop_width_) {
*crop_further = false; // no need for further crop
return Status::OK();
} else if (pad_if_needed_) {
// check the dimensions of the image for padding, if we do need padding, then we change the pad values
if (*padded_image_h < crop_height_) {
RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, crop_height_ - *padded_image_h, crop_height_ - *padded_image_h, 0, 0,
border_type_, fill_r_, fill_g_, fill_b_));
// update pad total above/below
t_pad_top += (crop_height_ - *padded_image_h);
t_pad_bottom += (crop_height_ - *padded_image_h);
if (*padded_image_w < crop_width_) {
RETURN_IF_NOT_OK(Pad(*pad_image, pad_image, 0, 0, crop_width_ - *padded_image_w, crop_width_ - *padded_image_w,
border_type_, fill_r_, fill_g_, fill_b_));
// update pad total left/right
t_pad_left += (crop_width_ - *padded_image_w);
t_pad_right += (crop_width_ - *padded_image_w);
*padded_image_h = (*pad_image)->shape()[0];
*padded_image_w = (*pad_image)->shape()[1];
if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) {
return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__,
"Crop size is greater than the image dimensions or is zero.");
return Status::OK();
void RandomCropOp::GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h) {
// GenCropPoints for cropping
*x = std::uniform_int_distribution<int>(0, *padded_image_w - crop_width_)(rnd_);
*y = std::uniform_int_distribution<int>(0, *padded_image_h - crop_height_)(rnd_);
Status RandomCropOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
// Apply padding first then crop
std::shared_ptr<Tensor> pad_image;
int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right;
int32_t padded_image_w;
int32_t padded_image_h;
bool crop_further = true; // whether image needs further cropping based on new size & requirements
Pad(input, &pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_));
CHECK_FAIL_RETURN_UNEXPECTED(pad_image->shape().Size() >= 2, "Abnormal shape");
int32_t padded_image_h = pad_image->shape()[0];
int32_t padded_image_w = pad_image->shape()[1];
// no need to crop if same size
if (padded_image_h == crop_height_ && padded_image_w == crop_width_) {
RETURN_IF_NOT_OK( // error code sent back directly
ImagePadding(input, &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right, &padded_image_w,
&padded_image_h, &crop_further));
if (!crop_further) {
*output = pad_image;
return Status::OK();
if (pad_if_needed_) {
// check the dimensions of the image for padding, if we do need padding, then we change the pad values
if (padded_image_h < crop_height_) {
RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, crop_height_ - padded_image_h, crop_height_ - padded_image_h, 0, 0,
border_type_, fill_r_, fill_g_, fill_b_));
if (padded_image_w < crop_width_) {
RETURN_IF_NOT_OK(Pad(pad_image, &pad_image, 0, 0, crop_width_ - padded_image_w, crop_width_ - padded_image_w,
border_type_, fill_r_, fill_g_, fill_b_));
padded_image_h = pad_image->shape()[0];
padded_image_w = pad_image->shape()[1];
if (padded_image_h < crop_height_ || padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) {
return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__,
"Crop size is greater than the image dimensions or is zero.");
// random top corner
int x = std::uniform_int_distribution<int>(0, padded_image_w - crop_width_)(rnd_);
int y = std::uniform_int_distribution<int>(0, padded_image_h - crop_height_)(rnd_);
int x, y;
GenRandomXY(&x, &y, &padded_image_w, &padded_image_h);
return Crop(pad_image, output, x, y, crop_width_, crop_height_);
Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));

View File

@ -50,11 +50,20 @@ class RandomCropOp : public TensorOp {
void Print(std::ostream &out) const override { out << "RandomCropOp: " << crop_height_ << " " << crop_width_; }
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
Status ImagePadding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *pad_image, int32_t *t_pad_top,
int32_t *t_pad_bottom, int32_t *t_pad_left, int32_t *t_pad_right, int32_t *padded_image_w,
int32_t *padded_image_h, bool *crop_further);
void GenRandomXY(int *x, int *y, int32_t *padded_image_w, int32_t *padded_image_h);
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
int32_t crop_height_ = 0;
int32_t crop_width_ = 0;
int32_t pad_top_ = 0;
int32_t pad_bottom_ = 0;
int32_t pad_left_ = 0;

View File

@ -0,0 +1,67 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <random>
#include <algorithm>
#include <utility>
#include "dataset/kernels/image/random_crop_with_bbox_op.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/util/random.h"
#include "dataset/util/status.h"
namespace mindspore {
namespace dataset {
Status RandomCropWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
std::shared_ptr<Tensor> pad_image;
int32_t t_pad_top, t_pad_bottom, t_pad_left, t_pad_right;
size_t boxCount = input[1]->shape()[0]; // number of rows
int32_t padded_image_h;
int32_t padded_image_w;
(*output)[1] = std::move(input[1]); // since some boxes may be removed
bool crop_further = true; // Whether further cropping will be required or not, true unless required size matches
RETURN_IF_NOT_OK( // Error passed back to caller
RandomCropOp::ImagePadding(input[0], &pad_image, &t_pad_top, &t_pad_bottom, &t_pad_left, &t_pad_right,
&padded_image_w, &padded_image_h, &crop_further));
// update bounding boxes with new values based on relevant image padding
if (t_pad_left || t_pad_bottom) {
PadBBoxes(&(*output)[1], &boxCount, &t_pad_left, &t_pad_top);
if (!crop_further) {
// no further cropping required
(*output)[0] = pad_image;
(*output)[1] = std::move(input[1]);
return Status::OK();
int x, y;
RandomCropOp::GenRandomXY(&x, &y, &padded_image_w, &padded_image_h);
int maxX = x + RandomCropOp::crop_width_; // max dims of selected CropBox on image
int maxY = y + RandomCropOp::crop_height_;
UpdateBBoxesForCrop(&(*output)[1], &boxCount, &x, &y, &maxX, &maxY);
return Crop(pad_image, &(*output)[0], x, y, RandomCropOp::crop_width_, RandomCropOp::crop_height_);
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,48 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <memory>
#include <vector>
#include "dataset/kernels/image/random_crop_op.h"
namespace mindspore {
namespace dataset {
class RandomCropWithBBoxOp : public RandomCropOp {
// Constructor for RandomCropWithBBoxOp, with default value and passing to base class constructor
RandomCropWithBBoxOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop,
int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft,
int32_t pad_right = kDefPadRight, BorderType border_types = kDefBorderType,
bool pad_if_needed = kDefPadIfNeeded, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG,
uint8_t fill_b = kDefFillB)
: RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, border_types, pad_if_needed,
fill_r, fill_g, fill_b) {}
~RandomCropWithBBoxOp() override = default;
void Print(std::ostream &out) const override {
out << "RandomCropWithBBoxOp: " << RandomCropOp::crop_height_ << " " << RandomCropOp::crop_width_;
Status Compute(const TensorRow &input, TensorRow *output) override;
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,58 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <utility>
#include "dataset/util/status.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/kernels/image/random_vertical_flip_with_bbox_op.h"
namespace mindspore {
namespace dataset {
const float RandomVerticalFlipWithBBoxOp::kDefProbability = 0.5;
Status RandomVerticalFlipWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
if (distribution_(rnd_)) {
dsize_t imHeight = input[0]->shape()[0];
size_t boxCount = input[1]->shape()[0]; // number of rows in tensor
// one time allocation -> updated in the loop
// type defined based on VOC test dataset
for (int i = 0; i < boxCount; i++) {
uint32_t boxCorner_y = 0;
uint32_t boxHeight = 0;
uint32_t newBoxCorner_y = 0;
input[1]->GetUnsignedIntAt(&boxCorner_y, {i, 1}); // get min y of bbox
input[1]->GetUnsignedIntAt(&boxHeight, {i, 3}); // get height of bbox
// subtract (curCorner + height) from (max) for new Corner position
newBoxCorner_y = (imHeight - 1) - (boxCorner_y + boxHeight);
input[1]->SetItemAt({i, 1}, newBoxCorner_y);
(*output)[1] = std::move(input[1]);
return VerticalFlip(input[0], &(*output)[0]);
*output = input;
return Status::OK();
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,52 @@
* Copyright 2020 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <memory>
#include <random>
#include "dataset/core/tensor.h"
#include "dataset/kernels/tensor_op.h"
#include "dataset/util/status.h"
#include "dataset/util/random.h"
namespace mindspore {
namespace dataset {
class RandomVerticalFlipWithBBoxOp : public TensorOp {
// Default values, also used by
static const float kDefProbability;
// Constructor for RandomVerticalFlipWithBBoxOp
// @param probability: Probablity of Image flipping, 0.5 by default
explicit RandomVerticalFlipWithBBoxOp(float probability = kDefProbability) : distribution_(probability) {
~RandomVerticalFlipWithBBoxOp() override = default;
void Print(std::ostream &out) const override { out << "RandomVerticalFlipWithBBoxOp"; }
Status Compute(const TensorRow &input, TensorRow *output) override;
std::mt19937 rnd_;
std::bernoulli_distribution distribution_;
} // namespace dataset
} // namespace mindspore

View File

@ -149,6 +149,54 @@ class RandomCrop(cde.RandomCropOp):
super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
class RandomCropWithBBox(cde.RandomCropWithBBoxOp):
Crop the input image at a random location, and adjust bounding boxes
size (int or sequence): The output size of the cropped image.
If size is an int, a square crop of size (size, size) is returned.
If size is a sequence of length 2, it should be (height, width).
padding (int or sequence, optional): The number of pixels to pad the image (default=None).
If padding is not None, pad image firstly with padding values.
If a single number is provided, it pads all borders with this value.
If a tuple or list of 2 values are provided, it pads the (left and top)
with the first value and (right and bottom) with the second value.
If 4 values are provided as a list or tuple,it pads the left, top, right and bottom respectively.
pad_if_needed (bool, optional): Pad the image if either side is smaller than
the given output size (default=False).
fill_value (int or tuple, optional): The pixel intensity of the borders if
the padding_mode is Border.CONSTANT (default=0). If it is a 3-tuple, it is used to
fill R, G, B channels respectively.
padding_mode (Border mode, optional): The method of padding (default=Border.CONSTANT). Can be any of
[Border.CONSTANT, Border.EDGE, Border.REFLECT, Border.SYMMETRIC].
- Border.CONSTANT, means it fills the border with constant values.
- Border.EDGE, means it pads with the last value on the edge.
- Border.REFLECT, means it reflects the values on the edge omitting the last
value of edge.
- Border.SYMMETRIC, means it reflects the values on the edge repeating the last
value of edge.
def __init__(self, size, padding=None, pad_if_needed=False, fill_value=0, padding_mode=Border.CONSTANT):
self.size = size
self.padding = padding
self.pad_if_needed = pad_if_needed
self.fill_value = fill_value
self.padding_mode = padding_mode.value
if padding is None:
padding = (0, 0, 0, 0)
if isinstance(fill_value, int): # temporary fix
fill_value = tuple([fill_value] * 3)
border_type = DE_C_BORDER_TYPE[padding_mode]
super().__init__(*size, *padding, border_type, pad_if_needed, *fill_value)
class RandomHorizontalFlip(cde.RandomHorizontalFlipOp):
Flip the input image horizontally, randomly with a given probability.
@ -192,6 +240,20 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp):
class RandomVerticalFlipWithBBox(cde.RandomVerticalFlipWithBBoxOp):
Flip the input image vertically and adjust bounding boxes, randomly with a given probability.
prob (float): Probability of the image being flipped (default=0.5).
def __init__(self, prob=0.5):
self.prob = prob
class BoundingBoxAug(cde.BoundingBoxAugOp):
Flip the input image vertically, randomly with a given probability.
@ -237,6 +299,42 @@ class Resize(cde.ResizeOp):
super().__init__(*size, interpoltn)
class RandomResizedCropWithBBox(cde.RandomCropAndResizeWithBBoxOp):
Crop the input image to a random size and aspect ratio and adjust the Bounding Boxes accordingly
size (int or sequence): The size of the output image.
If size is an int, a square crop of size (size, size) is returned.
If size is a sequence of length 2, it should be (height, width).
scale (tuple, optional): Range (min, max) of respective size of the original
size to be cropped (default=(0.08, 1.0)).
ratio (tuple, optional): Range (min, max) of aspect ratio to be cropped
(default=(3. / 4., 4. / 3.)).
interpolation (Inter mode, optional): Image interpolation mode (default=Inter.BILINEAR).
It can be any of [Inter.BILINEAR, Inter.NEAREST, Inter.BICUBIC].
- Inter.BILINEAR, means interpolation method is bilinear interpolation.
- Inter.NEAREST, means interpolation method is nearest-neighbor interpolation.
- Inter.BICUBIC, means interpolation method is bicubic interpolation.
max_attempts (int, optional): The maximum number of attempts to propose a valid
crop_area (default=10). If exceeded, fall back to use center_crop instead.
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
interpolation=Inter.BILINEAR, max_attempts=10):
self.size = size
self.scale = scale
self.ratio = ratio
self.interpolation = interpolation
self.max_attempts = max_attempts
interpoltn = DE_C_INTER_MODE[interpolation]
super().__init__(*size, *scale, *ratio, interpoltn, max_attempts)
class RandomResizedCrop(cde.RandomCropAndResizeOp):
Crop the input image to a random size and aspect ratio.