!15638 VOCDataset need to add img_id for evaluation

From: @xiefangqi Reviewed-by: @liucunwei Signed-off-by: @liucunwei
2021-05-17 19:33:24 +08:00 · 2021-05-17 19:33:24 +08:00 · 601322356b
parent 8cccfeb113 9b846c7873
commit 601322356b
18 changed files with 257 additions and 74 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@ -1121,28 +1121,29 @@ TextFileDataset::TextFileDataset(const std::vector<std::vector<char>> &dataset_f

 VOCDataset::VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                       const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
-                       bool decode, const std::shared_ptr<Sampler> &sampler,
-                       const std::shared_ptr<DatasetCache> &cache) {
+                       bool decode, const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache,
+                       bool extra_metadata) {
  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
  auto ds = std::make_shared<VOCNode>(CharToString(dataset_dir), CharToString(task), CharToString(usage),
-                                      MapCharToString(class_indexing), decode, sampler_obj, cache);
+                                      MapCharToString(class_indexing), decode, sampler_obj, cache, extra_metadata);
  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 VOCDataset::VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                       const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
-                       bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache) {
+                       bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache,
+                       bool extra_metadata) {
  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
  auto ds = std::make_shared<VOCNode>(CharToString(dataset_dir), CharToString(task), CharToString(usage),
-                                      MapCharToString(class_indexing), decode, sampler_obj, cache);
+                                      MapCharToString(class_indexing), decode, sampler_obj, cache, extra_metadata);
  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 VOCDataset::VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                       const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
                       bool decode, const std::reference_wrapper<Sampler> sampler,
-                       const std::shared_ptr<DatasetCache> &cache) {
+                       const std::shared_ptr<DatasetCache> &cache, bool extra_metadata) {
  auto sampler_obj = sampler.get().Parse();
  auto ds = std::make_shared<VOCNode>(CharToString(dataset_dir), CharToString(task), CharToString(usage),
-                                      MapCharToString(class_indexing), decode, sampler_obj, cache);
+                                      MapCharToString(class_indexing), decode, sampler_obj, cache, extra_metadata);
  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }  // namespace dataset

--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
@ -264,9 +264,10 @@ PYBIND_REGISTER(TFRecordNode, 2, ([](const py::module *m) {
 PYBIND_REGISTER(VOCNode, 2, ([](const py::module *m) {
                  (void)py::class_<VOCNode, DatasetNode, std::shared_ptr<VOCNode>>(*m, "VOCNode", "to create a VOCNode")
                    .def(py::init([](std::string dataset_dir, std::string task, std::string usage,
-                                     py::dict class_indexing, bool decode, py::handle sampler) {
-                      std::shared_ptr<VOCNode> voc = std::make_shared<VOCNode>(
-                        dataset_dir, task, usage, toStringMap(class_indexing), decode, toSamplerObj(sampler), nullptr);
+                                     py::dict class_indexing, bool decode, py::handle sampler, bool extra_metadata) {
+                      std::shared_ptr<VOCNode> voc =
+                        std::make_shared<VOCNode>(dataset_dir, task, usage, toStringMap(class_indexing), decode,
+                                                  toSamplerObj(sampler), nullptr, extra_metadata);
                      THROW_IF_ERROR(voc->ValidateParams());
                      return voc;
                    }));
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@ -58,7 +58,27 @@ Status IteratorConsumer::GetNextAsVector(std::vector<TensorPtr> *out) {
  // Return empty vector if there's no data
  RETURN_OK_IF_TRUE(res.empty());

-  std::copy(res.begin(), res.end(), std::back_inserter(*out));
+  // Filter meta column
+  std::vector<size_t> to_keep_indices;
+  for (const auto &colMap : tree_adapter_->GetColumnNameMap()) {
+    std::string column_name = colMap.first;
+    // Need to filter meta column start with kDftMetaColumnPrefix
+    size_t pos = column_name.find(kDftMetaColumnPrefix);
+    if (pos != std::string::npos && pos == 0) {
+      continue;
+    }
+    to_keep_indices.push_back(colMap.second);
+  }
+  if (to_keep_indices.size() == 0) {
+    std::string err_msg = "No effective column found, maybe all columns are meta column and will be filtered. ";
+    err_msg += "If you want to output meta column please rename column name to a new one which is not start with ";
+    err_msg += "\"" + std::string(kDftMetaColumnPrefix) + "\"";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  std::sort(to_keep_indices.begin(), to_keep_indices.end());
+  (void)std::transform(to_keep_indices.begin(), to_keep_indices.end(), std::back_inserter(*out),
+                       [&res](const auto &it) { return std::move(res[it]); });
+
  return Status::OK();
 }

@ -74,8 +94,20 @@ Status IteratorConsumer::GetNextAsMap(std::unordered_map<std::string, TensorPtr>

  // Populate the out map from the row and return it
  for (const auto &colMap : tree_adapter_->GetColumnNameMap()) {
+    std::string column_name = colMap.first;
+    // Need to filter meta column start with kDftMetaColumnPrefix
+    size_t pos = column_name.find(kDftMetaColumnPrefix);
+    if (pos != std::string::npos && pos == 0) {
+      continue;
+    }
    (*out_map)[colMap.first] = std::move(res[colMap.second]);
  }
+  if (out_map->size() == 0) {
+    std::string err_msg = "No effective column found, maybe all columns are meta column and will be filtered. ";
+    err_msg += "If you want to output meta column please rename column name to a new one which is not start with ";
+    err_msg += "\"" + std::string(kDftMetaColumnPrefix) + "\"";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
  return Status::OK();
 }

@ -90,23 +122,28 @@ Status IteratorConsumer::GetNextAsOrderedPair(std::vector<std::pair<std::string,
  size_t num_cols = curr_row.size();  // num_cols is non-empty.
  // order the column names according to their ids
  if (column_order_.empty()) {
-    const int32_t invalid_col_id = -1;
-    column_order_.resize(num_cols, {std::string(), invalid_col_id});
    for (const auto &itr : tree_adapter_->GetColumnNameMap()) {
      int32_t ind = itr.second;
      CHECK_FAIL_RETURN_UNEXPECTED(ind < num_cols && ind >= 0, "column id out of bounds.");
-      column_order_[ind] = std::make_pair(itr.first, ind);
-    }
-    // error check, make sure the ids in col_name_id_map are continuous and starts from 0
-    for (const auto &col : column_order_) {
-      CHECK_FAIL_RETURN_UNEXPECTED(col.second != invalid_col_id, "column ids are not continuous.");
+      // Need to filter meta column start with kDftMetaColumnPrefix
+      size_t pos = itr.first.find(kDftMetaColumnPrefix);
+      if (pos != std::string::npos && pos == 0) {
+        continue;
+      }
+      column_order_[ind] = itr.first;
    }
  }

-  vec->reserve(num_cols);
+  if (column_order_.size() == 0) {
+    std::string err_msg = "No effective column found, maybe all columns are meta column and will be filtered. ";
+    err_msg += "If you want to output meta column please rename column name to a new one which is not start with ";
+    err_msg += "\"" + std::string(kDftMetaColumnPrefix) + "\"";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  vec->reserve(column_order_.size());

  std::transform(column_order_.begin(), column_order_.end(), std::back_inserter(*vec),
-                 [curr_row](const auto &col) { return std::make_pair(col.first, curr_row[col.second]); });
+                 [curr_row](const auto &col) { return std::make_pair(col.second, curr_row[col.first]); });

  return Status::OK();
 }
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h
@ -86,7 +86,7 @@ class IteratorConsumer : public TreeConsumer {

 private:
  int32_t num_epochs_;
-  std::vector<std::pair<std::string, int32_t>> column_order_;  // key: column name, val: column id
+  std::map<int32_t, std::string> column_order_;  // key: column id, val: column name
 };

 #ifndef ENABLE_ANDROID
--- a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc
@ -58,8 +58,20 @@ Status DatasetIterator::GetNextAsMap(TensorMap *out_map) {

  // Populate the out map from the row and return it
  for (const auto colMap : col_name_id_map_) {
+    std::string column_name = colMap.first;
+    // Need to filter meta column start with kDftMetaColumnPrefix
+    size_t pos = column_name.find(kDftMetaColumnPrefix);
+    if (pos != std::string::npos && pos == 0) {
+      continue;
+    }
    (*out_map)[colMap.first] = std::move(curr_row[colMap.second]);
  }
+  if (out_map->size() == 0) {
+    std::string err_msg = "No effective column found, maybe all columns are meta column and will be filtered. ";
+    err_msg += "If you want to output meta column please rename column name to a new one which is not start with ";
+    err_msg += "\"" + std::string(kDftMetaColumnPrefix) + "\"";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }

  return Status::OK();
 }
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
@ -19,6 +19,7 @@
 #include <algorithm>
 #include <iostream>
 #include <memory>
+#include <unordered_map>

 #include "minddata/dataset/engine/dataset_iterator.h"
 #include "minddata/dataset/util/status.h"
@ -94,6 +95,33 @@ Status DeviceQueueOp::EoeReceived(int32_t worker_id) {
  return Status::OK();
 }

+Status DeviceQueueOp::FilterMetadata(TensorRow *row) {
+  std::unordered_map<std::string, int32_t> current_name_id_map = child_[0]->column_name_id_map();
+  TensorRow output;
+  TensorRow tmp = *row;
+  std::vector<size_t> to_keep_indices;
+  for (auto column : current_name_id_map) {
+    std::string column_name = column.first;
+    // Need to filter meta column start with kDftMetaColumnPrefix
+    size_t pos = column_name.find(kDftMetaColumnPrefix);
+    if (pos != std::string::npos && pos == 0) {
+      continue;
+    }
+    to_keep_indices.push_back(column.second);
+  }
+  if (to_keep_indices.size() == 0) {
+    std::string err_msg = "No effective column found, maybe all columns are meta column and will be filtered. ";
+    err_msg += "If you want to output meta column please rename column name to a new one which is not start with ";
+    err_msg += "\"" + std::string(kDftMetaColumnPrefix) + "\"";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  std::sort(to_keep_indices.begin(), to_keep_indices.end());
+  (void)std::transform(to_keep_indices.begin(), to_keep_indices.end(), std::back_inserter(output),
+                       [&tmp](const auto &it) { return std::move(tmp[it]); });
+  *row = std::move(output);
+  return Status::OK();
+}
+
 Status DeviceQueueOp::CheckExceptions(const TensorRow &row) const {
  // this method checks if the row meets the conditions to be sent to TDT
  for (const auto &item : row) {
@ -165,6 +193,7 @@ Status DeviceQueueOp::SendDataToAscend() {
  RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));
  while (!curr_row.eof() && !is_break_loop) {
    while (!curr_row.eoe() && !is_break_loop) {
+      RETURN_IF_NOT_OK(FilterMetadata(&curr_row));
      RETURN_IF_NOT_OK(CheckExceptions(curr_row));
      WaitContinueSignal();
 #ifdef ENABLE_DUMP_IR
@ -489,6 +518,7 @@ Status DeviceQueueOp::SendDataToGPU() {
  bool is_break_loop = false;
  while (!current_row.eof() && !is_break_loop && !GpuBufferMgr::GetInstance().IsClosed()) {
    while (!current_row.eoe() && !is_break_loop && !GpuBufferMgr::GetInstance().IsClosed()) {
+      RETURN_IF_NOT_OK(FilterMetadata(&current_row));
      RETURN_IF_NOT_OK(CheckExceptions(current_row));
      RETURN_IF_NOT_OK(receive_queues_[num_buf++ % num_workers_]->Add(std::move(current_row)));
      if (first_push_flag_ != true) {
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.h
@ -184,6 +184,10 @@ class DeviceQueueOp : public PipelineOp {
  std::string Name() const override { return kDeviceQueueOp; }

 private:
+  // Name: FilterMetadata(TensorRow *);
+  // Description: Auto filter metadata column before sending to device.
+  Status FilterMetadata(TensorRow *row);
+
  //  Name: checkExceptions(TensorRow);
  //  Description: Check whether the TensorRow meets the condition for performing DeviceQueueOp
  Status CheckExceptions(const TensorRow &row) const;
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc
@ -78,7 +78,7 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
  }
  *ptr = std::make_shared<VOCOp>(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_,
                                 builder_num_workers_, builder_op_connector_size_, builder_decode_,
-                                 std::move(builder_schema_), std::move(builder_sampler_));
+                                 std::move(builder_schema_), std::move(builder_sampler_), false);
  return Status::OK();
 }

@ -96,14 +96,15 @@ Status VOCOp::Builder::SanityCheck() {

 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
             const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
-             std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
+             std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
    : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
      decode_(decode),
      task_type_(task_type),
      usage_(task_mode),
      folder_path_(folder_path),
      class_index_(class_index),
-      data_schema_(std::move(data_schema)) {
+      data_schema_(std::move(data_schema)),
+      extra_metadata_(extra_metadata) {
  io_block_queues_.Init(num_workers_, queue_size);
 }

@ -124,30 +125,37 @@ void VOCOp::Print(std::ostream &out, bool show_all) const {

 Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
  std::string image_id = image_ids_[row_id];
+  std::vector<std::string> path_list;
+  const std::string kImageFile =
+    folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
  if (task_type_ == TaskType::Segmentation) {
    std::shared_ptr<Tensor> image, target;
-    const std::string kImageFile =
-      folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
    const std::string kTargetFile =
      folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
    RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target));
    (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
-    trow->setPath({kImageFile, kTargetFile});
+    path_list = {kImageFile, kTargetFile};
  } else if (task_type_ == TaskType::Detection) {
    std::shared_ptr<Tensor> image;
    TensorRow annotation;
-    const std::string kImageFile =
-      folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
    const std::string kAnnotationFile =
      folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
    RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
    trow->setId(row_id);
-    trow->setPath({kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile});
    trow->push_back(std::move(image));
    trow->insert(trow->end(), annotation.begin(), annotation.end());
+    path_list = {kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile};
  }
+  if (extra_metadata_) {
+    // Now VOCDataset add a new column named "_meta-filename".
+    std::shared_ptr<Tensor> filename;
+    RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, &filename));
+    trow->push_back(std::move(filename));
+    path_list.push_back(kImageFile);
+  }
+  trow->setPath(path_list);
  return Status::OK();
 }

@ -269,7 +277,9 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
    }
    object = object->NextSiblingElement("object");
  }
-  if (annotation.size() > 0) annotation_map_[path] = annotation;
+  if (annotation.size() > 0) {
+    annotation_map_[path] = annotation;
+  }
  return Status::OK();
 }

--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h
@ -160,9 +160,10 @@ class VOCOp : public MappableLeafOp {
  // @param bool decode - whether to decode images
  // @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
  // @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
+  // @param extra_metadata - flag to add extra meta-data to row
  VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
        const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
-        std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
+        std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata);

  // Destructor
  ~VOCOp() = default;
@ -259,6 +260,7 @@ class VOCOp : public MappableLeafOp {
  TaskType task_type_;
  std::string usage_;
  std::unique_ptr<DataSchema> data_schema_;
+  bool extra_metadata_;

  std::vector<std::string> image_ids_;
  std::map<std::string, int32_t> class_index_;
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc
@ -31,18 +31,20 @@ namespace dataset {
 // Constructor for VOCNode
 VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
                 const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
-                 std::shared_ptr<DatasetCache> cache)
+                 std::shared_ptr<DatasetCache> cache, bool extra_metadata)
    : MappableSourceNode(std::move(cache)),
      dataset_dir_(dataset_dir),
      task_(task),
      usage_(usage),
      class_index_(class_indexing),
      decode_(decode),
-      sampler_(sampler) {}
+      sampler_(sampler),
+      extra_metadata_(extra_metadata) {}

 std::shared_ptr<DatasetNode> VOCNode::Copy() {
  std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
-  auto node = std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_);
+  auto node =
+    std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_, extra_metadata_);
  return node;
 }

@ -108,12 +110,18 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
    RETURN_IF_NOT_OK(schema->AddColumn(
      ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
  }
+  if (extra_metadata_) {
+    std::string meta_file_name = std::string(kDftMetaColumnPrefix) + std::string(kColumnFileName);
+    TensorShape scalar = TensorShape::CreateScalar();
+    RETURN_IF_NOT_OK(schema->AddColumn(
+      ColDescriptor(meta_file_name, DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar)));
+  }
  std::shared_ptr<SamplerRT> sampler_rt = nullptr;
  RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));

  std::shared_ptr<VOCOp> voc_op;
  voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
-                                   decode_, std::move(schema), std::move(sampler_rt));
+                                   decode_, std::move(schema), std::move(sampler_rt), extra_metadata_);
  voc_op->set_total_repeats(GetTotalRepeats());
  voc_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
  node_ops->push_back(voc_op);
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.h
@ -32,7 +32,7 @@ class VOCNode : public MappableSourceNode {
  /// \brief Constructor
  VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
          const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
-          std::shared_ptr<DatasetCache> cache);
+          std::shared_ptr<DatasetCache> cache, bool extra_metadata = false);

  /// \brief Destructor
  ~VOCNode() = default;
@ -97,12 +97,14 @@ class VOCNode : public MappableSourceNode {
  const std::string kColumnLabel = "label";
  const std::string kColumnDifficult = "difficult";
  const std::string kColumnTruncate = "truncate";
+  const std::string kColumnFileName = "filename";
  std::string dataset_dir_;
  std::string task_;
  std::string usage_;
  std::map<std::string, int32_t> class_index_;
  bool decode_;
  std::shared_ptr<SamplerObj> sampler_;
+  bool extra_metadata_;
 };

 }  // namespace dataset
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
@ -109,6 +109,7 @@ constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
 constexpr int32_t kDftPrefetchSize = 20;
 constexpr int32_t kDftNumConnections = 12;
 constexpr int32_t kDftAutoNumWorkers = false;
+constexpr char kDftMetaColumnPrefix[] = "_meta-";

 // Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
 constexpr uint8_t kCVInvalidType = 255;
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
@ -1588,14 +1588,16 @@ class VOCDataset : public Dataset {
 public:
  explicit VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                      const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
-                      bool decode, const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
+                      bool decode, const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache,
+                      bool extra_metadata);
  explicit VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                      const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
-                      bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache);
+                      bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache,
+                      bool extra_metadata);
  explicit VOCDataset(const std::vector<char> &dataset_dir, const std::vector<char> &task,
                      const std::vector<char> &usage, const std::map<std::vector<char>, int32_t> &class_indexing,
                      bool decode, const std::reference_wrapper<Sampler> sampler,
-                      const std::shared_ptr<DatasetCache> &cache);
+                      const std::shared_ptr<DatasetCache> &cache, bool extra_metadata);
  ~VOCDataset() = default;
 };

@ -1613,14 +1615,16 @@ class VOCDataset : public Dataset {
 /// given,
 ///     a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
 /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false)
 /// \return Shared pointer to the current Dataset
 inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
                                       const std::string &usage = "train",
                                       const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
                                       const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr) {
+                                       const std::shared_ptr<DatasetCache> &cache = nullptr,
+                                       bool extra_metadata = false) {
  return std::make_shared<VOCDataset>(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage),
-                                      MapStringToChar(class_indexing), decode, sampler, cache);
+                                      MapStringToChar(class_indexing), decode, sampler, cache, extra_metadata);
 }

 /// \brief Function to create a VOCDataset
@ -1635,13 +1639,15 @@ inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
 /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false)
 /// \return Shared pointer to the current Dataset
 inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task,
                                       const std::string &usage, const std::map<std::string, int32_t> &class_indexing,
                                       bool decode, const Sampler *sampler,
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr) {
+                                       const std::shared_ptr<DatasetCache> &cache = nullptr,
+                                       bool extra_metadata = false) {
  return std::make_shared<VOCDataset>(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage),
-                                      MapStringToChar(class_indexing), decode, sampler, cache);
+                                      MapStringToChar(class_indexing), decode, sampler, cache, extra_metadata);
 }

 /// \brief Function to create a VOCDataset
@ -1656,13 +1662,15 @@ inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Sampler object used to choose samples from the dataset.
 /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false)
 /// \return Shared pointer to the current Dataset
 inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task,
                                       const std::string &usage, const std::map<std::string, int32_t> &class_indexing,
                                       bool decode, const std::reference_wrapper<Sampler> sampler,
-                                       const std::shared_ptr<DatasetCache> &cache = nullptr) {
+                                       const std::shared_ptr<DatasetCache> &cache = nullptr,
+                                       bool extra_metadata = false) {
  return std::make_shared<VOCDataset>(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage),
-                                      MapStringToChar(class_indexing), decode, sampler, cache);
+                                      MapStringToChar(class_indexing), decode, sampler, cache, extra_metadata);
 }

 std::shared_ptr<DatasetCache> CreateDatasetCacheCharIF(session_id_type id, uint64_t mem_sz, bool spill,
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -4288,8 +4288,8 @@ class VOCDataset(MappableDataset):

    The generated dataset has multiple columns :

-    - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
-      ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
+    - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32],
+      ['label', dtype=uint32], ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
    - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].

    This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive. The table
@ -4346,7 +4346,10 @@ class VOCDataset(MappableDataset):
        dataset_dir (str): Path to the root directory that contains the dataset.
        task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection"
            (default="Segmentation").
-        usage (str): The type of data list text file to be read (default="train").
+        usage (str): Set the task type of ImageSets(default="train"). If task is "Segmentation", image and annotation
+            list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is "Detection", image and
+            annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage is not set, image and
+            annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
        class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
            "Detection" task (default=None, the folder names will be sorted alphabetically and each
            class will be given a unique index starting from 0).
@ -4366,6 +4369,11 @@ class VOCDataset(MappableDataset):
            argument can only be specified when num_shards is also specified.
        cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
            (default=None, which means no cache is used).
+        extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
+            output at the end ['_meta-filename', dtype=string] (default=False).
+
+    Note:
+        '_meta-filename' won't be output unless an explicit rename dataset op is added to remove the prefix('_meta-').

    Raises:
        RuntimeError: If xml of Annotations is an invalid format.
@ -4404,7 +4412,7 @@ class VOCDataset(MappableDataset):
    @check_vocdataset
    def __init__(self, dataset_dir, task="Segmentation", usage="train", class_indexing=None, num_samples=None,
                 num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None,
-                 cache=None):
+                 cache=None, extra_metadata=False):
        super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
                         shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
        self.dataset_dir = dataset_dir
@ -4412,9 +4420,11 @@ class VOCDataset(MappableDataset):
        self.usage = replace_none(usage, "train")
        self.class_indexing = replace_none(class_indexing, {})
        self.decode = replace_none(decode, False)
+        self.extra_metadata = extra_metadata

    def parse(self, children=None):
-        return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler)
+        return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler,
+                           self.extra_metadata)

    def get_class_indexing(self):
        """
--- a/tests/ut/cpp/dataset/c_api_vision_random_test.cc
+++ b/tests/ut/cpp/dataset/c_api_vision_random_test.cc
@ -310,7 +310,8 @@ TEST_F(MindDataTestPipeline, TestRandomCropSuccess) {
  std::shared_ptr<TensorTransform> random_crop6(new vision::RandomCrop({20, 20}, {10, 10, 10, 10}, false, {4, 4, 4}));

  // Create a Map operation on ds
-  ds = ds->Map({random_crop, random_crop1, random_crop2, random_crop3, random_crop4, random_crop5, random_crop6});
+  ds = ds->Map({random_crop, random_crop1, random_crop2, random_crop3, random_crop4, random_crop5, random_crop6},
+               {"image"});
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
--- a/tests/ut/python/dataset/test_datasets_voc.py
+++ b/tests/ut/python/dataset/test_datasets_voc.py
@ -13,17 +13,21 @@
 # limitations under the License.
 # ==============================================================================
 import mindspore.dataset as ds
+import mindspore.dataset.text as text
 import mindspore.dataset.vision.c_transforms as vision

 DATA_DIR = "../data/dataset/testVOC2012"
+IMAGE_ID = ["32", "33", "39", "42", "61", "63", "68", "121", "123", "129"]
 IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268]
 TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680]


 def test_voc_segmentation():
-    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True, extra_metadata=True)
+    data1 = data1.rename("_meta-filename", "filename")
    num = 0
    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
+        assert text.to_str(item["filename"]) == IMAGE_ID[num]
        assert item["image"].shape[0] == IMAGE_SHAPE[num]
        assert item["target"].shape[0] == TARGET_SHAPE[num]
        num += 1
@ -31,10 +35,12 @@ def test_voc_segmentation():


 def test_voc_detection():
-    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True)
+    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True, extra_metadata=True)
+    data1 = data1.rename("_meta-filename", "filename")
    num = 0
    count = [0, 0, 0, 0, 0, 0]
    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
+        assert text.to_str(item["filename"]) == IMAGE_ID[num]
        assert item["image"].shape[0] == IMAGE_SHAPE[num]
        for label in item["label"]:
            count[label[0]] += 1
@ -80,6 +86,53 @@ def test_voc_get_class_indexing():
    assert count == [3, 2, 1, 2, 4, 3]


+def test_voc_meta_column():
+    # scenario one: output 2 columns if without rename meta column
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
+    num = 0
+    for item in data1.create_tuple_iterator():
+        assert len(item) == 2
+        num += 1
+
+    # scenario two: map input_columns == output_columns
+    def pyfunc1(img, label):
+        return img, label
+
+    data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
+    data2 = data2.map(operations=pyfunc1, input_columns=["image", "target"])
+    data2 = data2.rename("_meta-filename", "filename")
+    num = 0
+    for item in data2.create_tuple_iterator(output_numpy=True):
+        assert text.to_str(item[2]) == IMAGE_ID[num]
+        num += 1
+
+    # scenario three: map input_columns != output_columns
+    def pyfunc2(img, label):
+        return img, img, label
+
+    data3 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
+    data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"],
+                      column_order=["_meta-filename", "img1", "img2", "label"])
+    data3 = data3.rename("_meta-filename", "filename")
+    num = 0
+    for item in data3.create_tuple_iterator(output_numpy=True):
+        assert text.to_str(item[0]) == IMAGE_ID[num]
+        num += 1
+
+    # scenario four: map input_columns != output_columns
+    def pyfunc3(img, label):
+        return img
+
+    data4 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
+    data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"],
+                      column_order=["_meta-filename", "img1"])
+    data4 = data4.rename("_meta-filename", "filename")
+    num = 0
+    for item in data4.create_tuple_iterator(output_numpy=True):
+        assert text.to_str(item[0]) == IMAGE_ID[num]
+        num += 1
+
+
 def test_case_0():
    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True)

@ -93,7 +146,7 @@ def test_case_0():
    data1 = data1.batch(batch_size, drop_remainder=True)

    num = 0
-    for _ in data1.create_dict_iterator(num_epochs=1):
+    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
        num += 1
    assert num == 20

@ -110,7 +163,7 @@ def test_case_1():
    data1 = data1.batch(batch_size, drop_remainder=True, pad_info={})

    num = 0
-    for _ in data1.create_dict_iterator(num_epochs=1):
+    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
        num += 1
    assert num == 18

@ -122,12 +175,12 @@ def test_case_2():
    dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize)

    num_iter = 0
-    for _ in dataset1.create_dict_iterator(num_epochs=1):
+    for _ in dataset1.create_dict_iterator(num_epochs=1, output_numpy=True):
        num_iter += 1
    assert num_iter == 5

    num_iter = 0
-    for _ in dataset2.create_dict_iterator(num_epochs=1):
+    for _ in dataset2.create_dict_iterator(num_epochs=1, output_numpy=True):
        num_iter += 1
    assert num_iter == 5

@ -135,7 +188,7 @@ def test_case_2():
 def test_voc_exception():
    try:
        data1 = ds.VOCDataset(DATA_DIR, task="InvalidTask", usage="train", decode=True)
-        for _ in data1.create_dict_iterator(num_epochs=1):
+        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except ValueError:
@ -143,7 +196,7 @@ def test_voc_exception():

    try:
        data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", class_indexing={"cat": 0}, decode=True)
-        for _ in data2.create_dict_iterator(num_epochs=1):
+        for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except ValueError:
@ -151,7 +204,7 @@ def test_voc_exception():

    try:
        data3 = ds.VOCDataset(DATA_DIR, task="Detection", usage="notexist", decode=True)
-        for _ in data3.create_dict_iterator(num_epochs=1):
+        for _ in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except ValueError:
@ -159,7 +212,7 @@ def test_voc_exception():

    try:
        data4 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnotexist", decode=True)
-        for _ in data4.create_dict_iterator(num_epochs=1):
+        for _ in data4.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError:
@ -167,7 +220,7 @@ def test_voc_exception():

    try:
        data5 = ds.VOCDataset(DATA_DIR, task="Detection", usage="invalidxml", decode=True)
-        for _ in data5.create_dict_iterator(num_epochs=1):
+        for _ in data5.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError:
@ -175,7 +228,7 @@ def test_voc_exception():

    try:
        data6 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnoobject", decode=True)
-        for _ in data6.create_dict_iterator(num_epochs=1):
+        for _ in data6.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError:
@ -195,7 +248,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -205,7 +258,7 @@ def test_voc_exception():
        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
        data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -214,7 +267,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -223,7 +276,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["difficult"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -232,7 +285,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["truncate"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -241,7 +294,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -251,7 +304,7 @@ def test_voc_exception():
        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
        data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -260,7 +313,7 @@ def test_voc_exception():
    try:
        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
        data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -270,7 +323,7 @@ def test_voc_exception():
        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
        data = data.map(operations=vision.Decode(), input_columns=["target"], num_parallel_workers=1)
        data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1)
-        for _ in data.__iter__():
+        for _ in data.create_dict_iterator(output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
@ -291,6 +344,7 @@ if __name__ == '__main__':
    test_voc_detection()
    test_voc_class_index()
    test_voc_get_class_indexing()
+    test_voc_meta_column()
    test_case_0()
    test_case_1()
    test_case_2()
--- a/tests/ut/python/dataset/test_get_col_names.py
+++ b/tests/ut/python/dataset/test_get_col_names.py
@ -151,6 +151,8 @@ def test_get_column_name_to_device():
 def test_get_column_name_voc():
    data = ds.VOCDataset(VOC_DIR, task="Segmentation", usage="train", decode=True, shuffle=False)
    assert data.get_col_names() == ["image", "target"]
+    data = ds.VOCDataset(VOC_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
+    assert data.get_col_names() == ["image", "target", "_meta-filename"]


 def test_get_column_name_project():
--- a/tests/ut/python/dataset/test_sampler_chain.py
+++ b/tests/ut/python/dataset/test_sampler_chain.py
@ -269,7 +269,7 @@ def test_voc_sampler_chain():
    assert data1_size == 5

    # Verify number of rows
-    assert sum([1 for _ in data1]) == 5
+    assert sum([1 for _ in data1.create_dict_iterator(output_numpy=True)]) == 5

    # Verify dataset contents
    res = []