fix PadToSize with Decode

2022-06-20 14:36:36 +08:00 · 2022-06-20 14:36:36 +08:00 · bf7fc7064d
parent 4492406797
commit bf7fc7064d
5 changed files with 57 additions and 32 deletions
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@ -123,6 +123,32 @@ Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *siz
  return Status::OK();
 }

+Status ValidateImageDtype(const std::string &op_name, DataType dtype) {
+  uint8_t type = dtype.AsCVType();
+  if (type == kCVInvalidType) {
+    std::string type_name = "unknown";
+    if (type < DataType::NUM_OF_TYPES) {
+      type_name = std::string(DataType::kTypeInfo[type].name_);
+    }
+    std::string err_msg = op_name + ": Cannot convert [" + type_name + "] to OpenCV type." +
+                          " Currently unsupported data type: [uint32, int64, uint64, string]";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  return Status::OK();
+}
+
+Status ValidateImageRank(const std::string &op_name, int32_t rank) {
+  if (rank != kMinImageRank && rank != kDefaultImageRank) {
+    std::string err_msg =
+      op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
+    if (rank == 1) {
+      err_msg = err_msg + ". You may need to perform Decode first.";
+    }
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  return Status::OK();
+}
+
 bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
  if (tensor == nullptr) {
    return false;
@ -1798,18 +1824,6 @@ Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::share
  }
 }

-Status ValidateImageRank(const std::string &op_name, int32_t rank) {
-  if (rank != kMinImageRank && rank != kDefaultImageRank) {
-    std::string err_msg =
-      op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
-    if (rank == 1) {
-      err_msg = err_msg + ". You may need to perform Decode first.";
-    }
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
-  return Status::OK();
-}
-
 Status ToTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
  try {
    std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@ -83,6 +83,16 @@ Status ImageNumChannels(const std::shared_ptr<Tensor> &image, int *channels);
 /// \return The status code.
 Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size);

+/// \brief Validate image dtype.
+/// \param[in] op_name operator name.
+/// \param[in] dtype Date type of the image tensor.
+Status ValidateImageDtype(const std::string &op_name, DataType dtype);
+
+/// \brief Validate image rank.
+/// \param[in] op_name operator name.
+/// \param[in] rank refers to the rank of input image shape.
+Status ValidateImageRank(const std::string &op_name, int32_t rank);
+
 /// \brief Returns the check result of tensor rank and tensor shape
 /// \param[in] tensor: The input tensor need to check
 /// \param[in] channel: The channel index of tensor shape.
@ -408,11 +418,6 @@ Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
                        std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
                        SliceMode slice_mode);

-/// \brief Validate image rank.
-/// \param[in] op_name operator name.
-/// \param[in] rank refers to the rank of input image shape.
-Status ValidateImageRank(const std::string &op_name, int32_t rank);
-
 /// \brief Rescale and convert HWC to CHW format.
 /// \param[in] input The input image
 /// \param[in] data_type The output data type
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc
@ -38,6 +38,7 @@ std::string SizeToString(const std::vector<T> &size) {

 Status PadToSizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
  IO_CHECK(input, output);
+  RETURN_IF_NOT_OK(ValidateImageDtype("PadToSize", input->type()));
  RETURN_IF_NOT_OK(ValidateImageRank("PadToSize", input->Rank()));
  std::vector<dsize_t> image_size;
  RETURN_IF_NOT_OK(ImageSize(input, &image_size));
--- a/mindspore/python/mindspore/dataset/vision/transforms.py
+++ b/mindspore/python/mindspore/dataset/vision/transforms.py
@ -1318,6 +1318,7 @@ class PadToSize(TensorOperation):
            self.offset = [offset, offset] if isinstance(offset, int) else offset
        self.fill_value = tuple([fill_value] * 3) if isinstance(fill_value, int) else fill_value
        self.padding_mode = Border.to_c_type(padding_mode)
+        self.implementation = Implementation.C

    def parse(self):
        return cde.PadToSizeOperation(self.size, self.offset, self.fill_value, self.padding_mode)
--- a/tests/ut/python/dataset/test_pad_to_size.py
+++ b/tests/ut/python/dataset/test_pad_to_size.py
@ -24,7 +24,8 @@ import mindspore.dataset as ds
 import mindspore.dataset.vision as vision
 from mindspore.dataset.vision import Border, ConvertMode

-DATA_DIR_10 = "../data/dataset/testCifar10Data"
+IMAGE_DIR = "../data/dataset/testPK/data"
+CIFAR10_DIR = "../data/dataset/testCifar10Data"


 def test_pad_to_size_size():
@ -33,17 +34,19 @@ def test_pad_to_size_size():
    Description: Test parameter `size`
    Expectation: Output image shape is as expected
    """
-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
-    transforms = [vision.PadToSize(100)]
+    dataset = ds.ImageFolderDataset(IMAGE_DIR, num_samples=10)
+    transforms = [vision.Decode(to_pil=False),
+                  vision.PadToSize(5000)]
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
-        assert data["image"].shape == (100, 100, 3)
+        assert data["image"].shape == (5000, 5000, 3)

-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
-    transforms = [vision.PadToSize((52, 66))]
+    dataset = ds.ImageFolderDataset(IMAGE_DIR, num_samples=10)
+    transforms = [vision.Decode(to_pil=True),
+                  vision.PadToSize((2500, 4500))]
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
-        assert data["image"].shape == (52, 66, 3)
+        assert data["image"].shape == (2500, 4500, 3)


 def test_pad_to_size_offset():
@ -52,25 +55,25 @@ def test_pad_to_size_offset():
    Description: Test parameter `offset`
    Expectation: Output image shape is as expected
    """
-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    transforms = [vision.PadToSize((61, 57), None)]  # offset = None
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
        assert data["image"].shape == (61, 57, 3)

-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    transforms = [vision.PadToSize((61, 57), ())]  # offset is empty
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
        assert data["image"].shape == (61, 57, 3)

-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    transforms = [vision.PadToSize((61, 57), 5)]  # offset is int
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
        assert data["image"].shape == (61, 57, 3)

-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    transforms = [vision.PadToSize((61, 57), (3, 7))]  # offset is sequence
    dataset = dataset.map(operations=transforms, input_columns=["image"])
    for data in dataset.create_dict_iterator(num_epochs=1):
@ -100,7 +103,7 @@ def test_pad_to_size_grayscale():
    Description: Test on grayscale image
    Expectation: Output image shape is as expected
    """
-    dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    transforms = [vision.ConvertColor(ConvertMode.COLOR_RGB2GRAY),
                  vision.PadToSize(97)]
    dataset = dataset.map(operations=transforms, input_columns=["image"])
@ -116,13 +119,13 @@ def test_pad_to_size_vs_pad():
    """
    original_size = (32, 32)

-    dataset_pad_to_size = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset_pad_to_size = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    target_size = (50, 101)
    offset = (5, 13)
    transforms_pad_to_size = [vision.PadToSize(target_size, offset, fill_value=200, padding_mode=Border.CONSTANT)]
    dataset_pad_to_size = dataset_pad_to_size.map(operations=transforms_pad_to_size, input_columns=["image"])

-    dataset_pad = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
+    dataset_pad = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
    left = offset[1]
    top = offset[0]
    right = target_size[1] - original_size[1] - left
@ -175,7 +178,8 @@ def test_pad_to_size_check():
                       data=np.random.random((28, 28, 4)))
    test_invalid_input(RuntimeError, "input tensor is not in shape of <H,W> or <H,W,C>",
                       data=np.random.random(28))
-    test_invalid_input(RuntimeError, "load image failed", data=np.random.random((28, 28, 3)).astype(np.str))
+    test_invalid_input(RuntimeError, "Currently unsupported data type: [uint32, int64, uint64, string]",
+                       data=np.random.random((28, 28, 3)).astype(np.str))


 if __name__ == "__main__":