fix PadToSize with Decode

This commit is contained in:
Xiao Tianci 2022-06-20 14:36:36 +08:00
parent 4492406797
commit bf7fc7064d
5 changed files with 57 additions and 32 deletions

View File

@ -123,6 +123,32 @@ Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *siz
return Status::OK();
}
Status ValidateImageDtype(const std::string &op_name, DataType dtype) {
uint8_t type = dtype.AsCVType();
if (type == kCVInvalidType) {
std::string type_name = "unknown";
if (type < DataType::NUM_OF_TYPES) {
type_name = std::string(DataType::kTypeInfo[type].name_);
}
std::string err_msg = op_name + ": Cannot convert [" + type_name + "] to OpenCV type." +
" Currently unsupported data type: [uint32, int64, uint64, string]";
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
Status ValidateImageRank(const std::string &op_name, int32_t rank) {
if (rank != kMinImageRank && rank != kDefaultImageRank) {
std::string err_msg =
op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
if (rank == 1) {
err_msg = err_msg + ". You may need to perform Decode first.";
}
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
bool CheckTensorShape(const std::shared_ptr<Tensor> &tensor, const int &channel) {
if (tensor == nullptr) {
return false;
@ -1798,18 +1824,6 @@ Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::share
}
}
Status ValidateImageRank(const std::string &op_name, int32_t rank) {
if (rank != kMinImageRank && rank != kDefaultImageRank) {
std::string err_msg =
op_name + ": input tensor is not in shape of <H,W> or <H,W,C>, but got rank: " + std::to_string(rank);
if (rank == 1) {
err_msg = err_msg + ". You may need to perform Decode first.";
}
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
Status ToTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
try {
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);

View File

@ -83,6 +83,16 @@ Status ImageNumChannels(const std::shared_ptr<Tensor> &image, int *channels);
/// \return The status code.
Status ImageSize(const std::shared_ptr<Tensor> &image, std::vector<dsize_t> *size);
/// \brief Validate image dtype.
/// \param[in] op_name operator name.
/// \param[in] dtype Date type of the image tensor.
Status ValidateImageDtype(const std::string &op_name, DataType dtype);
/// \brief Validate image rank.
/// \param[in] op_name operator name.
/// \param[in] rank refers to the rank of input image shape.
Status ValidateImageRank(const std::string &op_name, int32_t rank);
/// \brief Returns the check result of tensor rank and tensor shape
/// \param[in] tensor: The input tensor need to check
/// \param[in] channel: The channel index of tensor shape.
@ -408,11 +418,6 @@ Status ComputePatchSize(const std::shared_ptr<CVTensor> &input_cv,
std::shared_ptr<std::pair<int32_t, int32_t>> *patch_size, int32_t num_height, int32_t num_width,
SliceMode slice_mode);
/// \brief Validate image rank.
/// \param[in] op_name operator name.
/// \param[in] rank refers to the rank of input image shape.
Status ValidateImageRank(const std::string &op_name, int32_t rank);
/// \brief Rescale and convert HWC to CHW format.
/// \param[in] input The input image
/// \param[in] data_type The output data type

View File

@ -38,6 +38,7 @@ std::string SizeToString(const std::vector<T> &size) {
Status PadToSizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
RETURN_IF_NOT_OK(ValidateImageDtype("PadToSize", input->type()));
RETURN_IF_NOT_OK(ValidateImageRank("PadToSize", input->Rank()));
std::vector<dsize_t> image_size;
RETURN_IF_NOT_OK(ImageSize(input, &image_size));

View File

@ -1318,6 +1318,7 @@ class PadToSize(TensorOperation):
self.offset = [offset, offset] if isinstance(offset, int) else offset
self.fill_value = tuple([fill_value] * 3) if isinstance(fill_value, int) else fill_value
self.padding_mode = Border.to_c_type(padding_mode)
self.implementation = Implementation.C
def parse(self):
return cde.PadToSizeOperation(self.size, self.offset, self.fill_value, self.padding_mode)

View File

@ -24,7 +24,8 @@ import mindspore.dataset as ds
import mindspore.dataset.vision as vision
from mindspore.dataset.vision import Border, ConvertMode
DATA_DIR_10 = "../data/dataset/testCifar10Data"
IMAGE_DIR = "../data/dataset/testPK/data"
CIFAR10_DIR = "../data/dataset/testCifar10Data"
def test_pad_to_size_size():
@ -33,17 +34,19 @@ def test_pad_to_size_size():
Description: Test parameter `size`
Expectation: Output image shape is as expected
"""
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
transforms = [vision.PadToSize(100)]
dataset = ds.ImageFolderDataset(IMAGE_DIR, num_samples=10)
transforms = [vision.Decode(to_pil=False),
vision.PadToSize(5000)]
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
assert data["image"].shape == (100, 100, 3)
assert data["image"].shape == (5000, 5000, 3)
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
transforms = [vision.PadToSize((52, 66))]
dataset = ds.ImageFolderDataset(IMAGE_DIR, num_samples=10)
transforms = [vision.Decode(to_pil=True),
vision.PadToSize((2500, 4500))]
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
assert data["image"].shape == (52, 66, 3)
assert data["image"].shape == (2500, 4500, 3)
def test_pad_to_size_offset():
@ -52,25 +55,25 @@ def test_pad_to_size_offset():
Description: Test parameter `offset`
Expectation: Output image shape is as expected
"""
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
transforms = [vision.PadToSize((61, 57), None)] # offset = None
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
assert data["image"].shape == (61, 57, 3)
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
transforms = [vision.PadToSize((61, 57), ())] # offset is empty
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
assert data["image"].shape == (61, 57, 3)
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
transforms = [vision.PadToSize((61, 57), 5)] # offset is int
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
assert data["image"].shape == (61, 57, 3)
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
transforms = [vision.PadToSize((61, 57), (3, 7))] # offset is sequence
dataset = dataset.map(operations=transforms, input_columns=["image"])
for data in dataset.create_dict_iterator(num_epochs=1):
@ -100,7 +103,7 @@ def test_pad_to_size_grayscale():
Description: Test on grayscale image
Expectation: Output image shape is as expected
"""
dataset = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
transforms = [vision.ConvertColor(ConvertMode.COLOR_RGB2GRAY),
vision.PadToSize(97)]
dataset = dataset.map(operations=transforms, input_columns=["image"])
@ -116,13 +119,13 @@ def test_pad_to_size_vs_pad():
"""
original_size = (32, 32)
dataset_pad_to_size = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset_pad_to_size = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
target_size = (50, 101)
offset = (5, 13)
transforms_pad_to_size = [vision.PadToSize(target_size, offset, fill_value=200, padding_mode=Border.CONSTANT)]
dataset_pad_to_size = dataset_pad_to_size.map(operations=transforms_pad_to_size, input_columns=["image"])
dataset_pad = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
dataset_pad = ds.Cifar10Dataset(CIFAR10_DIR, num_samples=10, shuffle=False)
left = offset[1]
top = offset[0]
right = target_size[1] - original_size[1] - left
@ -175,7 +178,8 @@ def test_pad_to_size_check():
data=np.random.random((28, 28, 4)))
test_invalid_input(RuntimeError, "input tensor is not in shape of <H,W> or <H,W,C>",
data=np.random.random(28))
test_invalid_input(RuntimeError, "load image failed", data=np.random.random((28, 28, 3)).astype(np.str))
test_invalid_input(RuntimeError, "Currently unsupported data type: [uint32, int64, uint64, string]",
data=np.random.random((28, 28, 3)).astype(np.str))
if __name__ == "__main__":