From f180e1f9d218d575f0201248e43f10fad5e9025a Mon Sep 17 00:00:00 2001 From: Xiao Tianci Date: Fri, 22 Jul 2022 14:33:03 +0800 Subject: [PATCH] add dtype validation for RandomCropWithBbox --- .../ccsrc/minddata/dataset/core/tensor.cc | 27 ++++++++++++------- .../dataset/kernels/image/image_utils.cc | 26 +++++++++--------- .../dataset/kernels/image/pad_to_size_op.cc | 3 +-- .../dataset/kernels/image/random_crop_op.cc | 6 ++--- .../minddata/dataset/kernels/py_func_op.cc | 21 +++++++++------ .../mindspore/dataset/engine/datasets.py | 9 ------- tests/ut/python/dataset/test_pad_to_size.py | 7 ++--- tests/ut/python/dataset/test_random_crop.py | 2 +- 8 files changed, 53 insertions(+), 48 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc index 6c98007c6b5..e7137592b20 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc @@ -92,19 +92,22 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept { } Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) { - CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); - CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); + CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Failed to create empty tensor, tensor shape is unknown."); + CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Failed to create empty tensor, data type is unknown."); RETURN_UNEXPECTED_IF_NULL(out); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); - CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Failed to create empty tensor, allocate memory failed."); // if it's a string tensor and it has no elements, Just initialize the shape and type. - if (!type.IsNumeric() && shape.NumOfElements() == 0) { - return Status::OK(); + if (!type.IsNumeric()) { + if (shape.NumOfElements() == 0) { + return Status::OK(); + } else { + RETURN_STATUS_UNEXPECTED( + "Failed to create empty tensor, number of elements should be 0 when data type is string."); + } } - CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric."); - int64_t byte_size = (*out)->SizeInBytes(); // Don't allocate if we have a tensor with no elements. @@ -197,7 +200,11 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr *out) { Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr *out) { RETURN_UNEXPECTED_IF_NULL(out); - if (DataType::FromNpArray(arr) == DataType::DE_STRING) { + DataType type = DataType::FromNpArray(arr); + CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, + "Failed to create tensor from numpy array, data type is unknown."); + + if (type == DataType::DE_STRING) { return CreateFromNpString(arr, out); } @@ -221,10 +228,10 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr * unsigned char *data = static_cast(arr.request().ptr); if (is_strided) { - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), DataType::FromNpArray(arr), out)); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), type, out)); RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes())); } else { - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), DataType::FromNpArray(arr), data, out)); + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), type, data, out)); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc index 6a92bc99ec5..2b85aa7174a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc @@ -142,7 +142,17 @@ Status ValidateImage(const std::shared_ptr &image, const std::string &op if (valid_rank.find(rank) == valid_rank.end()) { std::string err_msg = op_name + ": the dimension of image tensor does not match the requirement of operator."; err_msg += " Expecting tensor in dimension of " + NumberSetToString(valid_rank); + if (valid_rank == std::set({kMinImageRank, kDefaultImageRank})) { + err_msg += ", in shape of or "; + } else if (valid_rank == std::set({kMinImageRank})) { + err_msg += ", in shape of "; + } else if (valid_rank == std::set({kDefaultImageRank})) { + err_msg += ", in shape of "; + } err_msg += ". But got dimension " + std::to_string(rank) + "."; + if (rank == 1) { + err_msg += " You may need to perform Decode first."; + } RETURN_STATUS_UNEXPECTED(err_msg); } } @@ -164,8 +174,8 @@ Status ValidateImageDtype(const std::string &op_name, DataType dtype) { uint8_t type = dtype.AsCVType(); if (type == kCVInvalidType) { std::string type_name = "unknown"; - if (type < DataType::NUM_OF_TYPES) { - type_name = std::string(DataType::kTypeInfo[type].name_); + if (dtype.value() < DataType::NUM_OF_TYPES) { + type_name = std::string(DataType::kTypeInfo[dtype.value()].name_); } std::string err_msg = op_name + ": Cannot convert [" + type_name + "] to OpenCV type." + " Currently unsupported data type: [uint32, int64, uint64, string]"; @@ -1532,6 +1542,8 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) { try { + RETURN_IF_NOT_OK(ValidateImage(input, "Pad", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3})); + // input image std::shared_ptr input_cv = CVTensor::AsCVTensor(input); @@ -1539,16 +1551,6 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pad: load image failed."); } - // validate rank and number channels - RETURN_IF_NOT_OK(ValidateImageRank("Pad", input_cv->Rank())); - if (input_cv->Rank() == kDefaultImageRank) { - if (input_cv->shape()[kChannelIndexHWC] != kDefaultImageChannel && - input_cv->shape()[kChannelIndexHWC] != kMinImageChannel) { - RETURN_STATUS_UNEXPECTED("Pad: number of channels for input tensor can only be 1 or 3, got channel: " + - std::to_string(input_cv->shape()[kChannelIndexHWC])); - } - } - // get the border type in openCV auto b_type = GetCVBorderType(border_types); // output image diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc index ec6fbfca20d..3b53e8e81e0 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/pad_to_size_op.cc @@ -38,8 +38,7 @@ std::string SizeToString(const std::vector &size) { Status PadToSizeOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - RETURN_IF_NOT_OK(ValidateImageDtype("PadToSize", input->type())); - RETURN_IF_NOT_OK(ValidateImageRank("PadToSize", input->Rank())); + RETURN_IF_NOT_OK(ValidateImage(input, "PadToSize", {1, 2, 3, 4, 5, 6, 10, 11, 12}, {2, 3}, {1, 3})); std::vector image_size; RETURN_IF_NOT_OK(ImageSize(input, &image_size)); CHECK_FAIL_RETURN_SYNTAX_ERROR( diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc index dc883de4133..f2e20c936f2 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc @@ -62,7 +62,7 @@ Status RandomCropOp::ImagePadding(const std::shared_ptr &input, std::sha CHECK_FAIL_RETURN_UNEXPECTED( pad_top_ < input->shape()[0] * max_ratio && pad_bottom_ < input->shape()[0] * max_ratio && pad_left_ < input->shape()[1] * max_ratio && pad_right_ < input->shape()[1] * max_ratio, - "Pad: padding size is three times bigger than the image size, padding top: " + std::to_string(pad_top_) + + "RandomCrop: padding size is three times bigger than the image size, padding top: " + std::to_string(pad_top_) + ", padding bottom: " + std::to_string(pad_bottom_) + ", padding pad_left_: " + std::to_string(pad_left_) + ", padding padding right:" + std::to_string(pad_right_) + ", image shape: " + std::to_string(input->shape()[0]) + ", " + std::to_string(input->shape()[1])); @@ -125,12 +125,12 @@ Status RandomCropOp::Compute(const TensorRow &input, TensorRow *output) { for (size_t i = 0; i < input.size() - 1; i++) { if (input[i]->Rank() != 2 && input[i]->Rank() != 3) { std::string err_msg = - "RandomCropOp: image shape is not or , but got rank:" + std::to_string(input[i]->Rank()); + "RandomCrop: image shape is not or , but got rank:" + std::to_string(input[i]->Rank()); RETURN_STATUS_UNEXPECTED(err_msg); } if (input[i]->shape()[0] != input[i + 1]->shape()[0] || input[i]->shape()[1] != input[i + 1]->shape()[1]) { RETURN_STATUS_UNEXPECTED( - "RandomCropOp: Input images in different column must have the same shape, check the output shape in " + "RandomCrop: Input images in different column must have the same shape, check the output shape in " "specified 'input_columns' before call this operation."); } } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc index 28a440d9b32..26cd5da3b53 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/py_func_op.cc @@ -26,6 +26,16 @@ namespace mindspore { namespace dataset { +Status ConvertNumpyToTensor(const py::object &py_obj, TensorRow *output) { + std::shared_ptr out; + // Python object like bool, int, float, list or tuple can also be converted + // to a NumPy array by the following cast, but the data type will be unknown + // if it is not a valid NumPy object + RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(py_obj.cast(), &out)); + output->push_back(out); + return Status::OK(); +} + Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); Status ret = Status(StatusCode::kSuccess, "PyFunc Call Succeed"); @@ -57,7 +67,7 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { } else { if (py::isinstance(ret_py_obj)) { // In case of a n-m mapping, the return value will be a tuple of numpy arrays - py::tuple ret_py_tuple = ret_py_obj.cast(); + auto ret_py_tuple = ret_py_obj.cast(); // Iterate over two containers simultaneously for memory copy for (size_t i = 0; i < ret_py_tuple.size(); i++) { py::object ret_py_ele = ret_py_tuple[i]; @@ -67,16 +77,11 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { "True, PyFunc may execute time out."; goto TimeoutError; } - - std::shared_ptr out; - RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast(), &out)); - output->push_back(out); + RETURN_IF_NOT_OK(ConvertNumpyToTensor(ret_py_ele, output)); } } else { // In case of a n-1 mapping, the return value will be a numpy array - std::shared_ptr out; - RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_obj.cast(), &out)); - output->push_back(out); + RETURN_IF_NOT_OK(ConvertNumpyToTensor(ret_py_obj, output)); } } } catch (const py::error_already_set &e) { diff --git a/mindspore/python/mindspore/dataset/engine/datasets.py b/mindspore/python/mindspore/dataset/engine/datasets.py index 3d44a1cb49c..226a715fd33 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets.py +++ b/mindspore/python/mindspore/dataset/engine/datasets.py @@ -2746,14 +2746,6 @@ class _PythonCallable: if result is None: # Invoke original Python callable in master process in case the pool is gone. result = self.py_callable(*args) - if isinstance(result, tuple): - result_tmp = [] - for r in result: - r = np.array(r) if not isinstance(r, np.ndarray) else r - result_tmp.append(r) - result = tuple(result_tmp) - else: - result = np.array(result) if not isinstance(result, np.ndarray) else result return result def to_json(self): @@ -2828,7 +2820,6 @@ def _worker_loop(operations, pipe): """ signal.signal(signal.SIGINT, signal.SIG_IGN) - while not _main_process_already_exit(): _ignore_sigint() diff --git a/tests/ut/python/dataset/test_pad_to_size.py b/tests/ut/python/dataset/test_pad_to_size.py index 8390f65c879..f8b839883b7 100644 --- a/tests/ut/python/dataset/test_pad_to_size.py +++ b/tests/ut/python/dataset/test_pad_to_size.py @@ -174,11 +174,12 @@ def test_pad_to_size_check(): test_invalid_input(RuntimeError, "target size to pad should be no less than the original image size", size=(5, 5)) test_invalid_input(RuntimeError, "sum of offset and original image size should be no more than the target size", (30, 30), (5, 5)) - test_invalid_input(RuntimeError, "number of channels for input tensor can only be 1 or 3", + test_invalid_input(RuntimeError, "Expecting tensor in channel of (1, 3)", data=np.random.random((28, 28, 4))) - test_invalid_input(RuntimeError, "input tensor is not in shape of or ", + test_invalid_input(RuntimeError, "Expecting tensor in dimension of (2, 3)", data=np.random.random(28)) - test_invalid_input(RuntimeError, "Currently unsupported data type: [uint32, int64, uint64, string]", + test_invalid_input(RuntimeError, "Expecting tensor in type of " + "(bool, int8, uint8, int16, uint16, int32, float16, float32, float64)", data=np.random.random((28, 28, 3)).astype(np.str)) diff --git a/tests/ut/python/dataset/test_random_crop.py b/tests/ut/python/dataset/test_random_crop.py index 198aa3a3abf..c8a0d321adc 100644 --- a/tests/ut/python/dataset/test_random_crop.py +++ b/tests/ut/python/dataset/test_random_crop.py @@ -546,7 +546,7 @@ def test_random_crop_09(): with pytest.raises(RuntimeError) as error_info: for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): pass - error_msg = "number of channels for input tensor can only be 1 or 3" + error_msg = "Expecting tensor in channel of (1, 3)" assert error_msg in str(error_info.value)