!15727 VOCDataset read xml will filter invalid bbox automationally

From: @xiefangqi
Reviewed-by: @heleiwang,@jonyguo
Signed-off-by: @jonyguo
This commit is contained in:
mindspore-ci-bot 2021-05-07 09:50:53 +08:00 committed by Gitee
commit b85bf60eb8
10 changed files with 113 additions and 13 deletions

View File

@ -174,10 +174,16 @@ Status ManifestOp::ParseManifestFile() {
}
std::string line;
std::set<std::string> classes;
uint64_t line_count = 1;
while (getline(file_handle, line)) {
try {
nlohmann::json js = nlohmann::json::parse(line);
std::string image_file_path = js.value("source", "");
if (image_file_path == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, source is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
// If image is not JPEG/PNG/GIF/BMP, drop it
bool valid = false;
RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
@ -185,6 +191,11 @@ Status ManifestOp::ParseManifestFile() {
continue;
}
std::string usage = js.value("usage", "");
if (usage == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, usage is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
(void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower);
if (usage != usage_) {
continue;
@ -197,7 +208,8 @@ Status ManifestOp::ParseManifestFile() {
classes.insert(label_name);
if (label_name == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + image_file_path);
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + file_ + " at line " +
std::to_string(line_count));
}
if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) {
if (label_index_.find(label_name) == label_index_.end()) {
@ -209,6 +221,7 @@ Status ManifestOp::ParseManifestFile() {
if (!labels.empty()) {
image_labelname_.emplace_back(std::make_pair(image_file_path, labels));
}
line_count++;
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_);

View File

@ -213,6 +213,17 @@ void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value
}
}
Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
const std::string &path) {
if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
", " + std::to_string(static_cast<int>(ymax)) + "}";
RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
}
return Status::OK();
}
Status VOCOp::ParseAnnotationBbox(const std::string &path) {
if (!Path(path).Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
@ -245,6 +256,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
ParseNodeValue(bbox_node, "xmax", &xmax);
ParseNodeValue(bbox_node, "ymin", &ymin);
ParseNodeValue(bbox_node, "ymax", &ymax);
RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
}

View File

@ -230,6 +230,15 @@ class VOCOp : public MappableLeafOp {
// @return Status The status code returned
Status ParseAnnotationBbox(const std::string &path);
// @param xmin - the left coordinate of bndbox
// @param ymin - the top coordinate of bndbox
// @param xmax - the right coordinate of bndbox
// @param ymax - the bottom coordinate of bndbox
// @param path - the file path of bndbox xml
// @return Status The status code returned
Status CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
const std::string &path);
// @param XMLElement *bbox_node - bbox node info found in json object
// @param const char *name - sub node name in object
// @param float *value - value of certain sub node

View File

@ -0,0 +1,6 @@
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}

View File

@ -0,0 +1,6 @@
{"source":"../data/dataset/testManifestData/train/1.JPEG","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}

View File

@ -24,18 +24,6 @@
<ymax>242</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>425</xmin>
<ymin>239</ymin>
<xmax>445</xmax>
<ymax>235</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>

View File

@ -0,0 +1,39 @@
<annotation>
<folder>VOC2012</folder>
<filename>33.jpg</filename>
<source>
<database>simulate VOC2007 Database</database>
<annotation>simulate VOC2007</annotation>
<image>flickr</image>
</source>
<size>
<width>500</width>
<height>366</height>
<depth>3</depth>
</size>
<segmented>1</segmented>
<object>
<name>person</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>121</ymin>
<xmax>471</xmax>
<ymax>242</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>321</xmin>
<ymin>121</ymin>
<xmax>421</xmax>
<ymax>120</ymax>
</bndbox>
</object>
</annotation>

View File

@ -0,0 +1 @@
invalidbbox

View File

@ -160,6 +160,24 @@ def test_manifest_dataset_exception():
except RuntimeError as e:
assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
NO_SOURCE_DATA_FILE = "../data/dataset/testManifestData/invalidNoSource.manifest"
try:
data = ds.ManifestDataset(NO_SOURCE_DATA_FILE)
for _ in data.__iter__():
pass
assert False
except RuntimeError as e:
assert "Invalid data, source is not found in Manifest file" in str(e)
NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest"
try:
data = ds.ManifestDataset(NO_USAGE_DATA_FILE)
for _ in data.__iter__():
pass
assert False
except RuntimeError as e:
assert "Invalid data, usage is not found in Manifest file" in str(e)
if __name__ == '__main__':
test_manifest_dataset_train()

View File

@ -181,6 +181,14 @@ def test_voc_exception():
except RuntimeError:
pass
try:
data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox")
for _ in data7.create_dict_iterator(num_epochs=1):
pass
assert False
except RuntimeError as e:
assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)
def exception_func(item):
raise Exception("Error occur!")