From add29f95f54b220c5a8724c2fa91b9a01a93534d Mon Sep 17 00:00:00 2001 From: Mahdi Date: Fri, 12 Jun 2020 15:12:55 -0400 Subject: [PATCH] resolved issues in the comments --- .../image/random_crop_and_resize_op.cc | 53 ++++++++++------- .../kernels/image/random_crop_and_resize_op.h | 2 + .../dataset/random_crop_and_resize_op_test.cc | 55 ++++++++++++++++-- .../random_crop_and_resize_01_c_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_01_py_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_02_c_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_03_c_result.npz | Bin 644 -> 644 bytes .../dataset/test_random_crop_and_resize.py | 22 +++++-- 8 files changed, 101 insertions(+), 31 deletions(-) diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc index a3cf8cefb50..c5b5f20c638 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc @@ -35,8 +35,10 @@ RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t targ : target_height_(target_height), target_width_(target_width), rnd_scale_(scale_lb, scale_ub), - rnd_aspect_(aspect_lb, aspect_ub), + rnd_aspect_(log(aspect_lb), log(aspect_ub)), interpolation_(interpolation), + aspect_lb_(aspect_lb), + aspect_ub_(aspect_ub), max_iter_(max_iter) { rnd_.seed(GetSeed()); } @@ -64,33 +66,42 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector &inputs return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); } Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) { - double scale, aspect; *crop_width = w_in; *crop_height = h_in; - bool crop_success = false; + CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "Height is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "Aspect lower bound must be greater than zero"); for (int32_t i = 0; i < max_iter_; i++) { - scale = rnd_scale_(rnd_); - aspect = rnd_aspect_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); + double const sample_scale = rnd_scale_(rnd_); + // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale. + // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale. + double const sample_aspect = exp(rnd_aspect_(rnd_)); + + *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect))); + *crop_height = static_cast(std::round(*crop_width / sample_aspect)); if (*crop_width <= w_in && *crop_height <= h_in) { - crop_success = true; - break; + std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); + std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); + *x = rd_x(rnd_); + *y = rd_y(rnd_); + return Status::OK(); } } - if (!crop_success) { - CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); - aspect = static_cast(h_in) / w_in; - scale = rnd_scale_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); - *crop_height = (*crop_height > h_in) ? h_in : *crop_height; - *crop_width = (*crop_width > w_in) ? w_in : *crop_width; + double const img_aspect = static_cast(w_in) / h_in; + if (img_aspect < aspect_lb_) { + *crop_width = w_in; + *crop_height = static_cast(std::round(*crop_width / static_cast(aspect_lb_))); + } else { + if (img_aspect > aspect_ub_) { + *crop_height = h_in; + *crop_width = static_cast(std::round(*crop_height * static_cast(aspect_ub_))); + } else { + *crop_width = w_in; + *crop_height = h_in; + } } - std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); - std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); - *x = rd_x(rnd_); - *y = rd_y(rnd_); + *x = static_cast(std::round((w_in - *crop_width) / 2.0)); + *y = static_cast(std::round((h_in - *crop_height) / 2.0)); return Status::OK(); } } // namespace dataset diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h index 97ee9f60926..db805a9374a 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h @@ -60,6 +60,8 @@ class RandomCropAndResizeOp : public TensorOp { std::mt19937 rnd_; InterpolationMode interpolation_; int32_t max_iter_; + double aspect_lb_; + double aspect_ub_; }; } // namespace dataset } // namespace mindspore diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc index 7be18fb02ce..3d5298b0718 100644 --- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc +++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc @@ -28,17 +28,16 @@ class MindDataTestRandomCropAndResizeOp : public UT::CVOP::CVOpCommon { public: MindDataTestRandomCropAndResizeOp() : CVOpCommon() {} }; - -TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) { +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest1) { MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; TensorShape s_in = input_tensor_->shape(); std::shared_ptr output_tensor; int h_out = 1024; int w_out = 2048; - float aspect_lb = 0.2; - float aspect_ub = 5; - float scale_lb = 0.0001; - float scale_ub = 1.0; + float aspect_lb = 2; + float aspect_ub = 2.5; + float scale_lb = 0.2; + float scale_ub = 2.0; TensorShape s_out({h_out, w_out, s_in[2]}); @@ -51,3 +50,47 @@ TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) { MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; } +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest2) { + MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; + TensorShape s_in = input_tensor_->shape(); + std::shared_ptr output_tensor; + int h_out = 1024; + int w_out = 2048; + float aspect_lb = 1; + float aspect_ub = 1.5; + float scale_lb = 0.2; + float scale_ub = 2.0; + + TensorShape s_out({h_out, w_out, s_in[2]}); + + auto op = std::make_unique(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub); + Status s; + for (auto i = 0; i < 100; i++) { + s = op->Compute(input_tensor_, &output_tensor); + EXPECT_TRUE(s.IsOk()); + } + + MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; +} +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest3) { + MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; + TensorShape s_in = input_tensor_->shape(); + std::shared_ptr output_tensor; + int h_out = 1024; + int w_out = 2048; + float aspect_lb = 0.2; + float aspect_ub = 3; + float scale_lb = 0.2; + float scale_ub = 2.0; + + TensorShape s_out({h_out, w_out, s_in[2]}); + + auto op = std::make_unique(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub); + Status s; + for (auto i = 0; i < 100; i++) { + s = op->Compute(input_tensor_, &output_tensor); + EXPECT_TRUE(s.IsOk()); + } + + MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; +} \ No newline at end of file diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_01_c_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_01_c_result.npz index 9deccf5eec53e39e47a1f6ccee341efb844d25bc..7952076673371563bb180a9d5f4511c104ff3329 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTdhIc_;|27IOU=&d9|C=GVQr`ZH3hSW^76;EM9E0 y?Baf#SIjLf$rcTM%O-yV3U=o%GR$Lc*!U=W(c=ZXC&W+IVv=P6nKRj&$r1oe$Ry?f delta 99 zcmZo+ZDAD(@MdNaVSoTdh7}*!LpKU7U=*14c&?fLrj_$1YCfIzF6dU-1)+ic^Jqndu}~naN^`|KtaxJTpT8Hzi#hYFuU-S##Z^sT1>JmAaf>rGg$%vTAU+! diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_01_py_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_01_py_result.npz index 67459b2e824bad588b2b0b7c322b95d742c0bba8..bd5e6a83f0e25a8315d60238091f102ef11f1db0 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTd2E9!CTN{NIFbb4z@K1K{S2nhJvsd`}SHnM(4>76;biHs9 ycx;>1w|>{nwi=};ER(+h1vOT^c~%({xI}BR!{r+H4Qi9Mm}FT%=1lfxvIGE92qYK) delta 99 zcmZo+ZDAD(@MdNaVSoTd2LJ1K;x-B`U=+};ov?P9{^?IUk5q`OmNnT=KE$XZu#x9Q yhSI11(?W%>^nO!TZJzuMD0pJNy#3@$^Zx`!T0B?ftqGZ|#U#rDGH0?klO+I<$0Z^F diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_02_c_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_02_c_result.npz index 6f1b0517698d5379f4a92a3d6f101462fa915f71..a8c5bf8e9844a0af9aa8a9a84b5fa11bcd6ab446 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTdhWdbnM;nC}FbWhoeQ*i9HpOH{CR>hF_&4RrhZt1^m|{(u xs;A2(&2;<7<1T5saPl{x;O6e7Z1=My=fzAsJ6XohVAEtRCRrAcIg`DaECIfSAmRW3 delta 99 zcmZo+ZDAD(@MdNaVSoTd2Hj1cg*FN;U=%R@CN0Y+81o=Dj?40n_UzA-4>76;RLMO5 xwzFs<|7RnWdrrkIJd?ix1#LX+Z%QgSCW${wENK_|DLYwJmAaf>rGg$%vwB9B) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index d5f84b3b991..ef7c8b71884 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -41,7 +41,8 @@ def test_random_crop_and_resize_op_c(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() - random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (1, 1), (0.5, 0.5)) + # With these inputs we expect the code to crop the whole image + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) @@ -65,6 +66,7 @@ def test_random_crop_and_resize_op_c(plot=False): if plot: visualize(original_images, crop_and_resize_images) + def test_random_crop_and_resize_op_py(plot=False): """ Test RandomCropAndResize op in py transforms @@ -72,9 +74,10 @@ def test_random_crop_and_resize_op_py(plot=False): logger.info("test_random_crop_and_resize_op_py") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) + # With these inputs we expect the code to crop the whole image transforms1 = [ py_vision.Decode(), - py_vision.RandomResizedCrop((256, 512), (1, 1), (0.5, 0.5)), + py_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)), py_vision.ToTensor() ] transform1 = py_vision.ComposeOp(transforms1) @@ -96,6 +99,8 @@ def test_random_crop_and_resize_op_py(plot=False): original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = cv2.resize(original, (512, 256)) mse = diff_mse(crop_and_resize, original) + # Due to rounding error the mse for Python is not exactly 0 + assert mse <= 0.05 logger.info("random_crop_and_resize_op_{}, mse: {}".format(num_iter + 1, mse)) num_iter += 1 crop_and_resize_images.append(crop_and_resize) @@ -103,6 +108,7 @@ def test_random_crop_and_resize_op_py(plot=False): if plot: visualize(original_images, crop_and_resize_images) + def test_random_crop_and_resize_01(): """ Test RandomCropAndResize with md5 check, expected to pass @@ -114,7 +120,7 @@ def test_random_crop_and_resize_01(): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() - random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (0.5, 1), (0.5, 1)) + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (0.5, 0.5), (1, 1)) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) @@ -122,7 +128,7 @@ def test_random_crop_and_resize_01(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), - py_vision.RandomResizedCrop((256, 512), (0.5, 1), (0.5, 1)), + py_vision.RandomResizedCrop((256, 512), (0.5, 0.5), (1, 1)), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) @@ -137,6 +143,7 @@ def test_random_crop_and_resize_01(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_crop_and_resize_02(): """ Test RandomCropAndResize with md5 check:Image interpolation mode is Inter.NEAREST, @@ -172,6 +179,7 @@ def test_random_crop_and_resize_02(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_crop_and_resize_03(): """ Test RandomCropAndResize with md5 check: max_attempts is 1, expected to pass @@ -206,6 +214,7 @@ def test_random_crop_and_resize_03(): ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + def test_random_crop_and_resize_04_c(): """ Test RandomCropAndResize with c_tranforms: invalid range of scale (max