diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/cutmix_batch_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/cutmix_batch_op.cc index 43d4d8c6c9b..d4166545995 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/cutmix_batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/cutmix_batch_op.cc @@ -59,10 +59,17 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) { // Check inputs if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) { - RETURN_STATUS_UNEXPECTED("You must make sure images are HWC or CHW and batched before calling CutMixBatch."); + RETURN_STATUS_UNEXPECTED( + "CutMixBatch: You must make sure images are HWC or CHW and batched before calling CutMixBatch."); } - if (label_shape.size() != 2) { - RETURN_STATUS_UNEXPECTED("CutMixBatch: Label's must be in one-hot format and in a batch."); + if (!input.at(1)->type().IsInt()) { + RETURN_STATUS_UNEXPECTED("CutMixBatch: Wrong labels type. The second column (labels) must only include int types."); + } + if (label_shape.size() != 2 && label_shape.size() != 3) { + RETURN_STATUS_UNEXPECTED( + "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch " + "size, L is the number of labels in each row, " + "and C is the number of classes. labels must be in one-hot format and in a batch."); } if ((image_shape[1] != 1 && image_shape[1] != 3) && image_batch_format_ == ImageBatchFormat::kNCHW) { RETURN_STATUS_UNEXPECTED("CutMixBatch: Image doesn't match the given image format."); @@ -84,10 +91,12 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) { // Tensor holding the output labels std::shared_ptr out_labels; - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(label_shape), DataType(DataType::DE_FLOAT32), &out_labels)); + RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32))); + int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1; + int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1]; // Compute labels and images - for (int i = 0; i < image_shape[0]; i++) { + for (int64_t i = 0; i < image_shape[0]; i++) { // Calculating lambda // If x1 is a random variable from Gamma(a1, 1) and x2 is a random variable from Gamma(a2, 1) // then x = x1 / (x1+x2) is a random variable from Beta(a1, a2) @@ -138,22 +147,29 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) { } // Compute labels - for (int j = 0; j < label_shape[1]; j++) { - if (input.at(1)->type().IsSignedInt()) { - int64_t first_value, second_value; - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j})); - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i] % label_shape[0], j})); - RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, label_lam * first_value + (1 - label_lam) * second_value)); - } else { - uint64_t first_value, second_value; - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j})); - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i] % label_shape[0], j})); - RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, label_lam * first_value + (1 - label_lam) * second_value)); + + for (int64_t j = 0; j < row_labels; j++) { + for (int64_t k = 0; k < num_classes; k++) { + std::vector first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k}; + std::vector second_index = + label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k}; + if (input.at(1)->type().IsSignedInt()) { + int64_t first_value, second_value; + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index)); + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index)); + RETURN_IF_NOT_OK( + out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value)); + } else { + uint64_t first_value, second_value; + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index)); + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index)); + RETURN_IF_NOT_OK( + out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value)); + } } } } } - std::shared_ptr out_images; RETURN_IF_NOT_OK(TensorVectorToBatchTensor(images, &out_images)); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/mixup_batch_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/mixup_batch_op.cc index dcf91542299..8af17aab4d7 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/mixup_batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/mixup_batch_op.cc @@ -38,10 +38,17 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) { // Check inputs if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) { - RETURN_STATUS_UNEXPECTED("You must make sure images are HWC or CHW and batched before calling MixUpBatch."); + RETURN_STATUS_UNEXPECTED( + "MixUpBatch:You must make sure images are HWC or CHW and batched before calling MixUpBatch."); } - if (label_shape.size() != 2) { - RETURN_STATUS_UNEXPECTED("MixUpBatch: Label's must be in one-hot format and in a batch."); + if (!input.at(1)->type().IsInt()) { + RETURN_STATUS_UNEXPECTED("MixUpBatch: Wrong labels type. The second column (labels) must only include int types."); + } + if (label_shape.size() != 2 && label_shape.size() != 3) { + RETURN_STATUS_UNEXPECTED( + "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch " + "size, L is the number of labels in each row, " + "and C is the number of classes. labels must be in one-hot format and in a batch."); } if ((image_shape[1] != 1 && image_shape[1] != 3) && (image_shape[3] != 1 && image_shape[3] != 3)) { RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW."); @@ -65,23 +72,31 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) { // Compute labels std::shared_ptr out_labels; - RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType("float32"))); + RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32))); + + int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1; + int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1]; + for (int64_t i = 0; i < label_shape[0]; i++) { - for (int64_t j = 0; j < label_shape[1]; j++) { - if (input.at(1)->type().IsSignedInt()) { - int64_t first_value, second_value; - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j})); - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i], j})); - RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, lam * first_value + (1 - lam) * second_value)); - } else { - uint64_t first_value, second_value; - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j})); - RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i], j})); - RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, lam * first_value + (1 - lam) * second_value)); + for (int64_t j = 0; j < row_labels; j++) { + for (int64_t k = 0; k < num_classes; k++) { + std::vector first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k}; + std::vector second_index = + label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k}; + if (input.at(1)->type().IsSignedInt()) { + int64_t first_value, second_value; + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index)); + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index)); + RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value)); + } else { + uint64_t first_value, second_value; + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index)); + RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index)); + RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value)); + } } } } - // Compute images for (int64_t i = 0; i < images.size(); i++) { TensorShape remaining({-1}); diff --git a/tests/ut/python/dataset/test_cutmix_batch_op.py b/tests/ut/python/dataset/test_cutmix_batch_op.py index 3c602be6592..ee7599e3390 100644 --- a/tests/ut/python/dataset/test_cutmix_batch_op.py +++ b/tests/ut/python/dataset/test_cutmix_batch_op.py @@ -27,6 +27,7 @@ from util import save_and_check_md5, diff_mse, visualize_list, config_get_set_se DATA_DIR = "../data/dataset/testCifar10Data" DATA_DIR2 = "../data/dataset/testImageNetData2/train/" +DATA_DIR3 = "../data/dataset/testCelebAData/" GENERATE_GOLDEN = False @@ -36,7 +37,6 @@ def test_cutmix_batch_success1(plot=False): Test CutMixBatch op with specified alpha and prob parameters on a batch of CHW images """ logger.info("test_cutmix_batch_success1") - # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5, drop_remainder=True) @@ -164,6 +164,53 @@ def test_cutmix_batch_success3(plot=False): logger.info("MSE= {}".format(str(np.mean(mse)))) +def test_cutmix_batch_success4(plot=False): + """ + Test CutMixBatch on a dataset where OneHot returns a 2D vector + """ + logger.info("test_cutmix_batch_success4") + + ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) + decode_op = vision.Decode() + ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.batch(2, drop_remainder=True) + + images_original = None + for idx, (image, _) in enumerate(ds_original): + if idx == 0: + images_original = image + else: + images_original = np.append(images_original, image, axis=0) + + # CutMix Images + data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) + + decode_op = vision.Decode() + data1 = data1.map(input_columns=["image"], operations=[decode_op]) + + one_hot_op = data_trans.OneHot(num_classes=100) + data1 = data1.map(input_columns=["attr"], operations=one_hot_op) + + cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) + data1 = data1.batch(2, drop_remainder=True) + data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op) + + images_cutmix = None + for idx, (image, _) in enumerate(data1): + if idx == 0: + images_cutmix = image + else: + images_cutmix = np.append(images_cutmix, image, axis=0) + if plot: + visualize_list(images_original, images_cutmix) + + num_samples = images_original.shape[0] + mse = np.zeros(num_samples) + for i in range(num_samples): + mse[i] = diff_mse(images_cutmix[i], images_original[i]) + logger.info("MSE= {}".format(str(np.mean(mse)))) + + def test_cutmix_batch_nhwc_md5(): """ Test CutMixBatch on a batch of HWC images with MD5: @@ -368,7 +415,7 @@ def test_cutmix_batch_fail7(): images_cutmix = image else: images_cutmix = np.append(images_cutmix, image, axis=0) - error_message = "CutMixBatch: Label's must be in one-hot format and in a batch" + error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" assert error_message in str(error.value) @@ -394,6 +441,7 @@ if __name__ == "__main__": test_cutmix_batch_success1(plot=True) test_cutmix_batch_success2(plot=True) test_cutmix_batch_success3(plot=True) + test_cutmix_batch_success4(plot=True) test_cutmix_batch_nchw_md5() test_cutmix_batch_nhwc_md5() test_cutmix_batch_fail1() diff --git a/tests/ut/python/dataset/test_mixup_op.py b/tests/ut/python/dataset/test_mixup_op.py index 84e9c02e2ff..381db7960fd 100644 --- a/tests/ut/python/dataset/test_mixup_op.py +++ b/tests/ut/python/dataset/test_mixup_op.py @@ -26,6 +26,7 @@ from util import save_and_check_md5, diff_mse, visualize_list, config_get_set_se DATA_DIR = "../data/dataset/testCifar10Data" DATA_DIR2 = "../data/dataset/testImageNetData2/train/" +DATA_DIR3 = "../data/dataset/testCelebAData/" GENERATE_GOLDEN = False @@ -162,6 +163,55 @@ def test_mixup_batch_success3(plot=False): logger.info("MSE= {}".format(str(np.mean(mse)))) +def test_mixup_batch_success4(plot=False): + """ + Test MixUpBatch op on a dataset where OneHot returns a 2D vector. + Alpha parameter will be selected by default in this case + """ + logger.info("test_mixup_batch_success4") + + # Original Images + ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) + decode_op = vision.Decode() + ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) + ds_original = ds_original.batch(2, drop_remainder=True) + + images_original = None + for idx, (image, _) in enumerate(ds_original): + if idx == 0: + images_original = image + else: + images_original = np.append(images_original, image, axis=0) + + # MixUp Images + data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) + + decode_op = vision.Decode() + data1 = data1.map(input_columns=["image"], operations=[decode_op]) + + one_hot_op = data_trans.OneHot(num_classes=100) + data1 = data1.map(input_columns=["attr"], operations=one_hot_op) + + mixup_batch_op = vision.MixUpBatch() + data1 = data1.batch(2, drop_remainder=True) + data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op) + + images_mixup = np.array([]) + for idx, (image, _) in enumerate(data1): + if idx == 0: + images_mixup = image + else: + images_mixup = np.append(images_mixup, image, axis=0) + if plot: + visualize_list(images_original, images_mixup) + + num_samples = images_original.shape[0] + mse = np.zeros(num_samples) + for i in range(num_samples): + mse[i] = diff_mse(images_mixup[i], images_original[i]) + logger.info("MSE= {}".format(str(np.mean(mse)))) + + def test_mixup_batch_md5(): """ Test MixUpBatch with MD5: @@ -218,7 +268,7 @@ def test_mixup_batch_fail1(): images_mixup = image else: images_mixup = np.append(images_mixup, image, axis=0) - error_message = "You must make sure images are HWC or CHW and batch" + error_message = "You must make sure images are HWC or CHW and batched" assert error_message in str(error.value) @@ -316,12 +366,50 @@ def test_mixup_batch_fail4(): assert error_message in str(error.value) +def test_mixup_batch_fail5(): + """ + Test MixUpBatch Fail 5 + We expect this to fail because labels are not OntHot encoded + """ + logger.info("test_mixup_batch_fail5") + + # Original Images + ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) + ds_original = ds_original.batch(5) + + images_original = np.array([]) + for idx, (image, _) in enumerate(ds_original): + if idx == 0: + images_original = image + else: + images_original = np.append(images_original, image, axis=0) + + # MixUp Images + data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) + + mixup_batch_op = vision.MixUpBatch() + data1 = data1.batch(5, drop_remainder=True) + data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) + + with pytest.raises(RuntimeError) as error: + images_mixup = np.array([]) + for idx, (image, _) in enumerate(data1): + if idx == 0: + images_mixup = image + else: + images_mixup = np.append(images_mixup, image, axis=0) + error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" + assert error_message in str(error.value) + + if __name__ == "__main__": test_mixup_batch_success1(plot=True) test_mixup_batch_success2(plot=True) test_mixup_batch_success3(plot=True) + test_mixup_batch_success4(plot=True) test_mixup_batch_md5() test_mixup_batch_fail1() test_mixup_batch_fail2() test_mixup_batch_fail3() test_mixup_batch_fail4() + test_mixup_batch_fail5()