forked from mindspore-Ecosystem/mindspore
!15727 VOCDataset read xml will filter invalid bbox automationally
From: @xiefangqi Reviewed-by: @heleiwang,@jonyguo Signed-off-by: @jonyguo
This commit is contained in:
commit
b85bf60eb8
|
@ -174,10 +174,16 @@ Status ManifestOp::ParseManifestFile() {
|
|||
}
|
||||
std::string line;
|
||||
std::set<std::string> classes;
|
||||
uint64_t line_count = 1;
|
||||
while (getline(file_handle, line)) {
|
||||
try {
|
||||
nlohmann::json js = nlohmann::json::parse(line);
|
||||
std::string image_file_path = js.value("source", "");
|
||||
if (image_file_path == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, source is not found in Manifest file: " + file_ + " at line " +
|
||||
std::to_string(line_count));
|
||||
}
|
||||
// If image is not JPEG/PNG/GIF/BMP, drop it
|
||||
bool valid = false;
|
||||
RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
|
||||
|
@ -185,6 +191,11 @@ Status ManifestOp::ParseManifestFile() {
|
|||
continue;
|
||||
}
|
||||
std::string usage = js.value("usage", "");
|
||||
if (usage == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, usage is not found in Manifest file: " + file_ + " at line " +
|
||||
std::to_string(line_count));
|
||||
}
|
||||
(void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower);
|
||||
if (usage != usage_) {
|
||||
continue;
|
||||
|
@ -197,7 +208,8 @@ Status ManifestOp::ParseManifestFile() {
|
|||
classes.insert(label_name);
|
||||
if (label_name == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + image_file_path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + file_ + " at line " +
|
||||
std::to_string(line_count));
|
||||
}
|
||||
if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) {
|
||||
if (label_index_.find(label_name) == label_index_.end()) {
|
||||
|
@ -209,6 +221,7 @@ Status ManifestOp::ParseManifestFile() {
|
|||
if (!labels.empty()) {
|
||||
image_labelname_.emplace_back(std::make_pair(image_file_path, labels));
|
||||
}
|
||||
line_count++;
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_);
|
||||
|
|
|
@ -213,6 +213,17 @@ void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value
|
|||
}
|
||||
}
|
||||
|
||||
Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
|
||||
const std::string &path) {
|
||||
if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
|
||||
std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
|
||||
std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
|
||||
", " + std::to_string(static_cast<int>(ymax)) + "}";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VOCOp::ParseAnnotationBbox(const std::string &path) {
|
||||
if (!Path(path).Exists()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
|
||||
|
@ -245,6 +256,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
|
|||
ParseNodeValue(bbox_node, "xmax", &xmax);
|
||||
ParseNodeValue(bbox_node, "ymin", &ymin);
|
||||
ParseNodeValue(bbox_node, "ymax", &ymax);
|
||||
RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
|
||||
}
|
||||
|
|
|
@ -230,6 +230,15 @@ class VOCOp : public MappableLeafOp {
|
|||
// @return Status The status code returned
|
||||
Status ParseAnnotationBbox(const std::string &path);
|
||||
|
||||
// @param xmin - the left coordinate of bndbox
|
||||
// @param ymin - the top coordinate of bndbox
|
||||
// @param xmax - the right coordinate of bndbox
|
||||
// @param ymax - the bottom coordinate of bndbox
|
||||
// @param path - the file path of bndbox xml
|
||||
// @return Status The status code returned
|
||||
Status CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
|
||||
const std::string &path);
|
||||
|
||||
// @param XMLElement *bbox_node - bbox node info found in json object
|
||||
// @param const char *name - sub node name in object
|
||||
// @param float *value - value of certain sub node
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
|
@ -0,0 +1,6 @@
|
|||
{"source":"../data/dataset/testManifestData/train/1.JPEG","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/train/1.JPEG", "usage":"TRAIN","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"},{"type": "modelarts/image_classification","name": "flower","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/eval/1.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "cat","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
||||
{"source":"../data/dataset/testManifestData/eval/2.JPEG", "usage":"EVAL","id":"0162005993f8065ef47eefb59d1e4970","annotation": [{"type": "modelarts/image_classification","name": "dog","property": {"color":"white","kind":"Persian cat"},"hard":"true","hard-coefficient":0.8,"annotated-by":"human","creation-time":"2019-01-23 11:30:30"}],"inference-loc":"/path/to/inference-output"}
|
|
@ -24,18 +24,6 @@
|
|||
<ymax>242</ymax>
|
||||
</bndbox>
|
||||
</object>
|
||||
<object>
|
||||
<name>person</name>
|
||||
<pose>Left</pose>
|
||||
<truncated>0</truncated>
|
||||
<difficult>0</difficult>
|
||||
<bndbox>
|
||||
<xmin>425</xmin>
|
||||
<ymin>239</ymin>
|
||||
<xmax>445</xmax>
|
||||
<ymax>235</ymax>
|
||||
</bndbox>
|
||||
</object>
|
||||
<object>
|
||||
<name>person</name>
|
||||
<pose>Left</pose>
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
<annotation>
|
||||
<folder>VOC2012</folder>
|
||||
<filename>33.jpg</filename>
|
||||
<source>
|
||||
<database>simulate VOC2007 Database</database>
|
||||
<annotation>simulate VOC2007</annotation>
|
||||
<image>flickr</image>
|
||||
</source>
|
||||
<size>
|
||||
<width>500</width>
|
||||
<height>366</height>
|
||||
<depth>3</depth>
|
||||
</size>
|
||||
<segmented>1</segmented>
|
||||
<object>
|
||||
<name>person</name>
|
||||
<pose>Unspecified</pose>
|
||||
<truncated>0</truncated>
|
||||
<difficult>0</difficult>
|
||||
<bndbox>
|
||||
<xmin>8</xmin>
|
||||
<ymin>121</ymin>
|
||||
<xmax>471</xmax>
|
||||
<ymax>242</ymax>
|
||||
</bndbox>
|
||||
</object>
|
||||
<object>
|
||||
<name>person</name>
|
||||
<pose>Left</pose>
|
||||
<truncated>1</truncated>
|
||||
<difficult>0</difficult>
|
||||
<bndbox>
|
||||
<xmin>321</xmin>
|
||||
<ymin>121</ymin>
|
||||
<xmax>421</xmax>
|
||||
<ymax>120</ymax>
|
||||
</bndbox>
|
||||
</object>
|
||||
</annotation>
|
|
@ -0,0 +1 @@
|
|||
invalidbbox
|
|
@ -160,6 +160,24 @@ def test_manifest_dataset_exception():
|
|||
except RuntimeError as e:
|
||||
assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
|
||||
|
||||
NO_SOURCE_DATA_FILE = "../data/dataset/testManifestData/invalidNoSource.manifest"
|
||||
try:
|
||||
data = ds.ManifestDataset(NO_SOURCE_DATA_FILE)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid data, source is not found in Manifest file" in str(e)
|
||||
|
||||
NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest"
|
||||
try:
|
||||
data = ds.ManifestDataset(NO_USAGE_DATA_FILE)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid data, usage is not found in Manifest file" in str(e)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_manifest_dataset_train()
|
||||
|
|
|
@ -181,6 +181,14 @@ def test_voc_exception():
|
|||
except RuntimeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox")
|
||||
for _ in data7.create_dict_iterator(num_epochs=1):
|
||||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)
|
||||
|
||||
def exception_func(item):
|
||||
raise Exception("Error occur!")
|
||||
|
||||
|
|
Loading…
Reference in New Issue