From 458ff5fed9a75b402a9ab276a8b0a559f95404ea Mon Sep 17 00:00:00 2001 From: xiefangqi Date: Mon, 26 Apr 2021 19:49:46 +0800 Subject: [PATCH] md fix voc and manifest error scenario --- .../engine/datasetops/source/manifest_op.cc | 15 ++++++- .../engine/datasetops/source/voc_op.cc | 12 ++++++ .../dataset/engine/datasetops/source/voc_op.h | 9 +++++ .../testManifestData/invalidNoSource.manifest | 6 +++ .../testManifestData/invalidNoUsage.manifest | 6 +++ .../dataset/testVOC2012/Annotations/33.xml | 12 ------ .../testVOC2012/Annotations/invalidbbox.xml | 39 +++++++++++++++++++ .../ImageSets/Main/xmlinvalidbbox.txt | 1 + .../dataset/test_datasets_manifestop.py | 18 +++++++++ tests/ut/python/dataset/test_datasets_voc.py | 8 ++++ 10 files changed, 113 insertions(+), 13 deletions(-) create mode 100644 tests/ut/data/dataset/testManifestData/invalidNoSource.manifest create mode 100644 tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest create mode 100644 tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml create mode 100644 tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc index 2ddbbe3f9f4..d6b7d447385 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc @@ -174,10 +174,16 @@ Status ManifestOp::ParseManifestFile() { } std::string line; std::set classes; + uint64_t line_count = 1; while (getline(file_handle, line)) { try { nlohmann::json js = nlohmann::json::parse(line); std::string image_file_path = js.value("source", ""); + if (image_file_path == "") { + file_handle.close(); + RETURN_STATUS_UNEXPECTED("Invalid data, source is not found in Manifest file: " + file_ + " at line " + + std::to_string(line_count)); + } // If image is not JPEG/PNG/GIF/BMP, drop it bool valid = false; RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid)); @@ -185,6 +191,11 @@ Status ManifestOp::ParseManifestFile() { continue; } std::string usage = js.value("usage", ""); + if (usage == "") { + file_handle.close(); + RETURN_STATUS_UNEXPECTED("Invalid data, usage is not found in Manifest file: " + file_ + " at line " + + std::to_string(line_count)); + } (void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower); if (usage != usage_) { continue; @@ -197,7 +208,8 @@ Status ManifestOp::ParseManifestFile() { classes.insert(label_name); if (label_name == "") { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + image_file_path); + RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + file_ + " at line " + + std::to_string(line_count)); } if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) { if (label_index_.find(label_name) == label_index_.end()) { @@ -209,6 +221,7 @@ Status ManifestOp::ParseManifestFile() { if (!labels.empty()) { image_labelname_.emplace_back(std::make_pair(image_file_path, labels)); } + line_count++; } catch (const std::exception &err) { file_handle.close(); RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc index cbc005a77a1..8de936177d9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc @@ -213,6 +213,17 @@ void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value } } +Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax, + const std::string &path) { + if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) { + std::string invalid_bbox = "{" + std::to_string(static_cast(xmin)) + ", " + + std::to_string(static_cast(ymin)) + ", " + std::to_string(static_cast(xmax)) + + ", " + std::to_string(static_cast(ymax)) + "}"; + RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path); + } + return Status::OK(); +} + Status VOCOp::ParseAnnotationBbox(const std::string &path) { if (!Path(path).Exists()) { RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path); @@ -245,6 +256,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) { ParseNodeValue(bbox_node, "xmax", &xmax); ParseNodeValue(bbox_node, "ymin", &ymin); ParseNodeValue(bbox_node, "ymax", &ymax); + RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path)); } else { RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h index ed16462c211..60908ea1e6c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h @@ -230,6 +230,15 @@ class VOCOp : public MappableLeafOp { // @return Status The status code returned Status ParseAnnotationBbox(const std::string &path); + // @param xmin - the left coordinate of bndbox + // @param ymin - the top coordinate of bndbox + // @param xmax - the right coordinate of bndbox + // @param ymax - the bottom coordinate of bndbox + // @param path - the file path of bndbox xml + // @return Status The status code returned + Status CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax, + const std::string &path); + // @param XMLElement *bbox_node - bbox node info found in json object // @param const char *name - sub node name in object // @param float *value - value of certain sub node diff --git a/tests/ut/data/dataset/testManifestData/invalidNoSource.manifest b/tests/ut/data/dataset/testManifestData/invalidNoSource.manifest new file mode 100644 index 00000000000..a6ac55c4c0e --- /dev/null +++ b/tests/ut/data/dataset/testManifestData/invalidNoSource.manifest @@ -0,0 +1,6 @@ +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} diff --git a/tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest b/tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest new file mode 100644 index 00000000000..dc0b4e76851 --- /dev/null +++ b/tests/ut/data/dataset/testManifestData/invalidNoUsage.manifest @@ -0,0 +1,6 @@ +{"source":"../data/dataset/testManifestData/train/1.JPEG","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} +{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"} diff --git a/tests/ut/data/dataset/testVOC2012/Annotations/33.xml b/tests/ut/data/dataset/testVOC2012/Annotations/33.xml index 240dcfdb89f..f30ce702c9d 100644 --- a/tests/ut/data/dataset/testVOC2012/Annotations/33.xml +++ b/tests/ut/data/dataset/testVOC2012/Annotations/33.xml @@ -24,18 +24,6 @@ 242 - - person - Left - 0 - 0 - - 425 - 239 - 445 - 235 - - person Left diff --git a/tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml b/tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml new file mode 100644 index 00000000000..96d89c2731a --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012/Annotations/invalidbbox.xml @@ -0,0 +1,39 @@ + + VOC2012 + 33.jpg + + simulate VOC2007 Database + simulate VOC2007 + flickr + + + 500 + 366 + 3 + + 1 + + person + Unspecified + 0 + 0 + + 8 + 121 + 471 + 242 + + + + person + Left + 1 + 0 + + 321 + 121 + 421 + 120 + + + diff --git a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt new file mode 100644 index 00000000000..e575aff437d --- /dev/null +++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlinvalidbbox.txt @@ -0,0 +1 @@ +invalidbbox diff --git a/tests/ut/python/dataset/test_datasets_manifestop.py b/tests/ut/python/dataset/test_datasets_manifestop.py index 53f5a5c6994..644c2a00ee3 100644 --- a/tests/ut/python/dataset/test_datasets_manifestop.py +++ b/tests/ut/python/dataset/test_datasets_manifestop.py @@ -160,6 +160,24 @@ def test_manifest_dataset_exception(): except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) + NO_SOURCE_DATA_FILE = "../data/dataset/testManifestData/invalidNoSource.manifest" + try: + data = ds.ManifestDataset(NO_SOURCE_DATA_FILE) + for _ in data.__iter__(): + pass + assert False + except RuntimeError as e: + assert "Invalid data, source is not found in Manifest file" in str(e) + + NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest" + try: + data = ds.ManifestDataset(NO_USAGE_DATA_FILE) + for _ in data.__iter__(): + pass + assert False + except RuntimeError as e: + assert "Invalid data, usage is not found in Manifest file" in str(e) + if __name__ == '__main__': test_manifest_dataset_train() diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py index c03ea544cd2..1d224620b47 100644 --- a/tests/ut/python/dataset/test_datasets_voc.py +++ b/tests/ut/python/dataset/test_datasets_voc.py @@ -181,6 +181,14 @@ def test_voc_exception(): except RuntimeError: pass + try: + data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox") + for _ in data7.create_dict_iterator(num_epochs=1): + pass + assert False + except RuntimeError as e: + assert "Invalid bndbox: {321, 121, 421, 120}" in str(e) + def exception_func(item): raise Exception("Error occur!")