!19764 [MD] Remove Builder class phase 3

Merge pull request !19764 from harshvardhangupta/rem-build-2
This commit is contained in:
i-robot 2021-07-12 13:25:34 +00:00 committed by Gitee
commit 884f5ae1de
14 changed files with 103 additions and 892 deletions

View File

@ -48,79 +48,6 @@ const char kJsonCategoriesName[] = "name";
const float kDefaultPadValue = -1.0; const float kDefaultPadValue = -1.0;
const unsigned int kPadValueZero = 0; const unsigned int kPadValueZero = 0;
CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_op_connector_size_ = cfg->op_connector_size();
builder_task_type_ = TaskType::Detection;
}
Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
if (builder_sampler_ == nullptr) {
const int64_t num_samples = 0;
const int64_t start_index = 0;
builder_sampler_ = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
builder_schema_ = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
switch (builder_task_type_) {
case TaskType::Detection:
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
break;
case TaskType::Stuff:
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoSegmentation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
break;
case TaskType::Keypoint:
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoKeypoints), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoNumKeypoints), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
break;
case TaskType::Panoptic:
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
break;
default:
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
*ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_,
builder_op_connector_size_, builder_decode_, std::move(builder_schema_),
std::move(builder_sampler_), false);
return Status::OK();
}
Status CocoOp::Builder::SanityCheck() {
Path dir(builder_dir_);
Path file(builder_file_);
std::string err_msg;
err_msg += dir.IsDirectory() == false
? "Invalid parameter, Coco image folder path is invalid or not set, path: " + builder_dir_ + ".\n"
: "";
err_msg += file.Exists() == false
? "Invalid parameter, Coco annotation JSON path is invalid or not set, path: " + builder_dir_ + ".\n"
: "";
err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " +
std::to_string(builder_num_workers_) + ".\n"
: "";
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, bool extra_metadata) std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
@ -568,21 +495,9 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c
return Status::OK(); return Status::OK();
} }
Status CocoOp::CountTotalRows(const std::string &dir, const std::string &file, const std::string &task, Status CocoOp::CountTotalRows(int64_t *count) {
int64_t *count) { RETURN_IF_NOT_OK(ParseAnnotationIds());
std::shared_ptr<CocoOp> op; *count = static_cast<int64_t>(image_ids_.size());
RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op));
RETURN_IF_NOT_OK(op->ParseAnnotationIds());
*count = static_cast<int64_t>(op->image_ids_.size());
return Status::OK();
}
Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file, const std::string &task,
std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
std::shared_ptr<CocoOp> op;
RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op));
RETURN_IF_NOT_OK(op->ParseAnnotationIds());
*output_class_indexing = op->label_index_;
return Status::OK(); return Status::OK();
} }
@ -604,19 +519,8 @@ Status CocoOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<i
MS_LOG(ERROR) << "Class index only valid in \"Detection\" and \"Panoptic\" task."; MS_LOG(ERROR) << "Class index only valid in \"Detection\" and \"Panoptic\" task.";
RETURN_STATUS_UNEXPECTED("GetClassIndexing: Get Class Index failed in CocoOp."); RETURN_STATUS_UNEXPECTED("GetClassIndexing: Get Class Index failed in CocoOp.");
} }
std::shared_ptr<CocoOp> op; RETURN_IF_NOT_OK(ParseAnnotationIds());
std::string task_type; for (const auto &label : label_index_) {
switch (task_type_) {
case TaskType::Detection:
task_type = "Detection";
break;
case TaskType::Panoptic:
task_type = "Panoptic";
break;
}
RETURN_IF_NOT_OK(Builder().SetDir(image_folder_path_).SetFile(annotation_path_).SetTask(task_type).Build(&op));
RETURN_IF_NOT_OK(op->ParseAnnotationIds());
for (const auto label : op->label_index_) {
(*output_class_indexing).emplace_back(std::make_pair(label.first, label.second)); (*output_class_indexing).emplace_back(std::make_pair(label.first, label.second));
} }
} }

View File

@ -168,21 +168,8 @@ class CocoOp : public MappableLeafOp {
// @param show_all // @param show_all
void Print(std::ostream &out, bool show_all) const override; void Print(std::ostream &out, bool show_all) const override;
// @param const std::string &dir - Coco image dir path
// @param const std::string &file - Coco json file path
// @param const std::string &task - task mode of Coco task
// @param int64_t numSamples - samples number of CocoDataset
// @param int64_t *count - output rows number of CocoDataset // @param int64_t *count - output rows number of CocoDataset
static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, Status CountTotalRows(int64_t *count);
int64_t *count);
// @param const std::string &dir - Coco image dir path
// @param const std::string &file - Coco json file path
// @param const std::string &task - task mode of Coco task
// @param int64_t numSamples - samples number of CocoDataset
// @param std::map<std::string, int32_t> *output_class_indexing - output class index of CocoDataset
static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing);
// Op name getter // Op name getter
// @return Name of the current Op // @return Name of the current Op

View File

@ -619,9 +619,9 @@ Status CsvOp::CountAllFileRows(const std::vector<std::string> &files, bool csv_h
int32_t num_workers = GlobalContext::config_manager()->num_parallel_workers(); int32_t num_workers = GlobalContext::config_manager()->num_parallel_workers();
int32_t op_connector_size = GlobalContext::config_manager()->op_connector_size(); int32_t op_connector_size = GlobalContext::config_manager()->op_connector_size();
int32_t worker_connector_size = GlobalContext::config_manager()->worker_connector_size(); int32_t worker_connector_size = GlobalContext::config_manager()->worker_connector_size();
int32_t device_id = 0; const int32_t device_id = 0;
int32_t num_devices = 1; const int32_t num_devices = 1;
int32_t num_samples = 0; const int64_t num_samples = 0;
bool shuffle_files = false; bool shuffle_files = false;
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_list; std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_list;
std::vector<std::string> column_name_list; std::vector<std::string> column_name_list;
@ -629,7 +629,7 @@ Status CsvOp::CountAllFileRows(const std::vector<std::string> &files, bool csv_h
std::shared_ptr<CsvOp> op; std::shared_ptr<CsvOp> op;
*count = 0; *count = 0;
if (!csv_header) { if (!csv_header) {
column_name_list.push_back(""); column_name_list.emplace_back("");
} }
op = std::make_shared<CsvOp>(files, field_delim, column_list, column_name_list, num_workers, num_samples, op = std::make_shared<CsvOp>(files, field_delim, column_list, column_name_list, num_workers, num_samples,
worker_connector_size, op_connector_size, shuffle_files, num_devices, device_id); worker_connector_size, op_connector_size, shuffle_files, num_devices, device_id);

View File

@ -31,39 +31,6 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_op_connector_size_ = cfg->op_connector_size();
}
Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
if (builder_sampler_ == nullptr) {
const int64_t num_samples = 0;
const int64_t start_index = 0;
builder_sampler_ = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
builder_schema_ = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(
builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(
builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
*ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_file_, builder_op_connector_size_, builder_decode_,
builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_),
builder_usage_);
return Status::OK();
}
Status ManifestOp::Builder::SanityCheck() {
std::string err_msg;
err_msg += builder_file_.empty() ? "Invalid parameter, Manifest file is not set.\n" : "";
err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " +
std::to_string(builder_num_workers_) + ".\n"
: "";
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, std::string usage) std::shared_ptr<SamplerRT> sampler, std::string usage)
@ -296,45 +263,13 @@ Status ManifestOp::CountDatasetInfo() {
return Status::OK(); return Status::OK();
} }
Status ManifestOp::CountTotalRows(const std::string &file, const std::map<std::string, int32_t> &map, Status ManifestOp::CountTotalRows(int64_t *count) {
const std::string &usage, int64_t *count, int64_t *numClasses) {
// the logic of counting the number of samples is copied from ParseManifestFile()
std::shared_ptr<ManifestOp> op;
*count = 0; *count = 0;
RETURN_IF_NOT_OK(Builder().SetManifestFile(file).SetClassIndex(map).SetUsage(usage).Build(&op)); RETURN_IF_NOT_OK(ParseManifestFile());
RETURN_IF_NOT_OK(op->ParseManifestFile()); *count = static_cast<int64_t>(image_labelname_.size());
*numClasses = static_cast<int64_t>(op->label_index_.size());
*count = static_cast<int64_t>(op->image_labelname_.size());
return Status::OK(); return Status::OK();
} }
#ifdef ENABLE_PYTHON
Status ManifestOp::GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage,
std::map<std::string, int32_t> *output_class_indexing) {
std::map<std::string, int32_t> input_class_indexing;
for (auto p : dict) {
(void)input_class_indexing.insert(std::pair<std::string, int32_t>(py::reinterpret_borrow<py::str>(p.first),
py::reinterpret_borrow<py::int_>(p.second)));
}
if (!input_class_indexing.empty()) {
*output_class_indexing = input_class_indexing;
} else {
std::shared_ptr<ManifestOp> op;
RETURN_IF_NOT_OK(Builder().SetManifestFile(file).SetClassIndex(input_class_indexing).SetUsage(usage).Build(&op));
RETURN_IF_NOT_OK(op->ParseManifestFile());
RETURN_IF_NOT_OK(op->CountDatasetInfo());
uint32_t count = 0;
for (const auto label : op->label_index_) {
(*output_class_indexing).insert(std::make_pair(label.first, count));
count++;
}
}
return Status::OK();
}
#endif
Status ManifestOp::ComputeColMap() { Status ManifestOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
@ -354,10 +289,8 @@ Status ManifestOp::GetNumClasses(int64_t *num_classes) {
return Status::OK(); return Status::OK();
} }
int64_t classes_count; int64_t classes_count;
std::shared_ptr<ManifestOp> op; RETURN_IF_NOT_OK(ParseManifestFile());
RETURN_IF_NOT_OK(Builder().SetManifestFile(file_).SetClassIndex(class_index_).SetUsage(usage_).Build(&op)); classes_count = static_cast<int64_t>(label_index_.size());
RETURN_IF_NOT_OK(op->ParseManifestFile());
classes_count = static_cast<int64_t>(op->label_index_.size());
*num_classes = classes_count; *num_classes = classes_count;
num_classes_ = classes_count; num_classes_ = classes_count;
return Status::OK(); return Status::OK();
@ -365,12 +298,10 @@ Status ManifestOp::GetNumClasses(int64_t *num_classes) {
Status ManifestOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) { Status ManifestOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
if ((*output_class_indexing).empty()) { if ((*output_class_indexing).empty()) {
std::shared_ptr<ManifestOp> op; RETURN_IF_NOT_OK(ParseManifestFile());
RETURN_IF_NOT_OK(Builder().SetManifestFile(file_).SetClassIndex(class_index_).SetUsage(usage_).Build(&op)); RETURN_IF_NOT_OK(CountDatasetInfo());
RETURN_IF_NOT_OK(op->ParseManifestFile()); int32_t count = 0;
RETURN_IF_NOT_OK(op->CountDatasetInfo()); for (const auto &label : label_index_) {
uint32_t count = 0;
for (const auto label : op->label_index_) {
if (!class_index_.empty()) { if (!class_index_.empty()) {
(*output_class_indexing) (*output_class_indexing)
.emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first]))); .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));

View File

@ -38,92 +38,6 @@ namespace mindspore {
namespace dataset { namespace dataset {
class ManifestOp : public MappableLeafOp { class ManifestOp : public MappableLeafOp {
public: public:
class Builder {
public:
// Constructor for Builder class of ManifestOp
Builder();
// Destructor
~Builder() = default;
// Setter method
// @param int32_t size
// @return Builder setter method returns reference to the builder.
Builder &SetOpConnectorSize(int32_t size) {
builder_op_connector_size_ = size;
return *this;
}
// Setter method
// @param const std::map<std::string, int32_t>& map - a class name to label map
// @return
Builder &SetClassIndex(const std::map<std::string, int32_t> &map) {
builder_labels_to_read_ = map;
return *this;
}
// Setter method
// @param bool do_decode
// @return Builder setter method returns reference to the builder.
Builder &SetDecode(bool do_decode) {
builder_decode_ = do_decode;
return *this;
}
// Setter method
// @param int32_t num_workers
// @return Builder setter method returns reference to the builder.
Builder &SetNumWorkers(int32_t num_workers) {
builder_num_workers_ = num_workers;
return *this;
}
// Setter method
// @param std::shared_ptr<Sampler> sampler
// @return Builder setter method returns reference to the builder.
Builder &SetSampler(std::shared_ptr<SamplerRT> sampler) {
builder_sampler_ = std::move(sampler);
return *this;
}
// Setter method
// @param const std::string & dir
// @return Builder setter method returns reference to the builder.
Builder &SetManifestFile(const std::string &file) {
builder_file_ = file;
return *this;
}
// Setter method
// @param const std::string & dir
// @return Builder setter method returns reference to the builder.
Builder &SetUsage(const std::string &usage) {
builder_usage_ = usage;
return *this;
}
// Check validity of input args
// @return Status The status code returned
Status SanityCheck();
// The builder "build" method creates the final object.
// @param std::shared_ptr<ManifestOp> *op - DatasetOp
// @return Status The status code returned
Status Build(std::shared_ptr<ManifestOp> *op);
private:
std::shared_ptr<SamplerRT> builder_sampler_;
bool builder_decode_;
std::string builder_file_;
int32_t builder_num_workers_;
int32_t builder_rows_per_buffer_;
int32_t builder_op_connector_size_;
std::unique_ptr<DataSchema> builder_schema_;
std::string builder_usage_;
std::map<std::string, int32_t> builder_labels_to_read_;
};
// Constructor // Constructor
// @param int32_t num_works - Num of workers reading images in parallel // @param int32_t num_works - Num of workers reading images in parallel
// @param std::string - file list of Manifest // @param std::string - file list of Manifest
@ -146,20 +60,9 @@ class ManifestOp : public MappableLeafOp {
void Print(std::ostream &out, bool show_all) const override; void Print(std::ostream &out, bool show_all) const override;
/// \brief Counts the total number of rows in Manifest /// \brief Counts the total number of rows in Manifest
/// \param[in] file Dataset file path
/// \param[in] input_class_indexing Input map of class index
/// \param[in] usage Dataset usage
/// \param[out] count Number of rows counted /// \param[out] count Number of rows counted
/// \param[out] numClasses Number of classes counted
/// \return Status of the function /// \return Status of the function
static Status CountTotalRows(const std::string &file, const std::map<std::string, int32_t> &map, Status CountTotalRows(int64_t *count);
const std::string &usage, int64_t *count, int64_t *numClasses);
#ifdef ENABLE_PYTHON
// Get str-to-int mapping from label name to index
static Status GetClassIndexing(const std::string &file, const py::dict &dict, const std::string &usage,
std::map<std::string, int32_t> *output_class_indexing);
#endif
// Op name getter // Op name getter
// @return Name of the current Op // @return Name of the current Op

View File

@ -45,56 +45,6 @@ const char kSegmentationExtension[] = ".png";
const char kAnnotationExtension[] = ".xml"; const char kAnnotationExtension[] = ".xml";
const char kImageSetsExtension[] = ".txt"; const char kImageSetsExtension[] = ".txt";
VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_op_connector_size_ = cfg->op_connector_size();
builder_task_type_ = TaskType::Segmentation;
}
Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
if (builder_sampler_ == nullptr) {
const int64_t num_samples = 0;
const int64_t start_index = 0;
builder_sampler_ = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
builder_schema_ = std::make_unique<DataSchema>();
if (builder_task_type_ == TaskType::Segmentation) {
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnTarget), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
} else if (builder_task_type_ == TaskType::Detection) {
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnLabel), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnDifficult), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
}
*ptr = std::make_shared<VOCOp>(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_,
builder_num_workers_, builder_op_connector_size_, builder_decode_,
std::move(builder_schema_), std::move(builder_sampler_), false);
return Status::OK();
}
Status VOCOp::Builder::SanityCheck() {
Path dir(builder_dir_);
std::string err_msg;
err_msg += dir.IsDirectory() == false
? "Invalid parameter, VOC path is invalid or not set, path: " + builder_dir_ + ".\n"
: "";
err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " +
std::to_string(builder_num_workers_) + ".\n"
: "";
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode, const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata) std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
@ -161,6 +111,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
} }
Status VOCOp::ParseImageIds() { Status VOCOp::ParseImageIds() {
if (!image_ids_.empty()) return Status::OK();
std::string image_sets_file; std::string image_sets_file;
if (task_type_ == TaskType::Segmentation) { if (task_type_ == TaskType::Segmentation) {
image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension); image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension);
@ -357,51 +308,20 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
return Status::OK(); return Status::OK();
} }
Status VOCOp::CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, Status VOCOp::CountTotalRows(int64_t *count) {
const std::map<std::string, int32_t> &input_class_indexing, int64_t *count) { switch (task_type_) {
if (task_type == "Detection") { case TaskType::Detection:
std::shared_ptr<VOCOp> op; RETURN_IF_NOT_OK(ParseImageIds());
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(ParseAnnotationIds());
Builder().SetDir(dir).SetTask(task_type).SetUsage(task_mode).SetClassIndex(input_class_indexing).Build(&op)); break;
RETURN_IF_NOT_OK(op->ParseImageIds()); case TaskType::Segmentation:
RETURN_IF_NOT_OK(op->ParseAnnotationIds()); RETURN_IF_NOT_OK(ParseImageIds());
*count = static_cast<int64_t>(op->image_ids_.size()); break;
} else if (task_type == "Segmentation") {
std::shared_ptr<VOCOp> op;
RETURN_IF_NOT_OK(Builder().SetDir(dir).SetTask(task_type).SetUsage(task_mode).Build(&op));
RETURN_IF_NOT_OK(op->ParseImageIds());
*count = static_cast<int64_t>(op->image_ids_.size());
} }
*count = static_cast<int64_t>(image_ids_.size());
return Status::OK(); return Status::OK();
} }
#ifdef ENABLE_PYTHON
Status VOCOp::GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
const py::dict &dict, std::map<std::string, int32_t> *output_class_indexing) {
std::map<std::string, int32_t> input_class_indexing;
for (auto p : dict) {
(void)input_class_indexing.insert(std::pair<std::string, int32_t>(py::reinterpret_borrow<py::str>(p.first),
py::reinterpret_borrow<py::int_>(p.second)));
}
if (!input_class_indexing.empty()) {
*output_class_indexing = input_class_indexing;
} else {
std::shared_ptr<VOCOp> op;
RETURN_IF_NOT_OK(
Builder().SetDir(dir).SetTask(task_type).SetUsage(task_mode).SetClassIndex(input_class_indexing).Build(&op));
RETURN_IF_NOT_OK(op->ParseImageIds());
RETURN_IF_NOT_OK(op->ParseAnnotationIds());
for (const auto label : op->label_index_) {
(*output_class_indexing).insert(std::make_pair(label.first, label.second));
}
}
return Status::OK();
}
#endif
Status VOCOp::ComputeColMap() { Status VOCOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
@ -420,12 +340,9 @@ Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<in
MS_LOG(ERROR) << "Class index only valid in \"Detection\" task."; MS_LOG(ERROR) << "Class index only valid in \"Detection\" task.";
RETURN_STATUS_UNEXPECTED("GetClassIndexing: Get Class Index failed in VOCOp."); RETURN_STATUS_UNEXPECTED("GetClassIndexing: Get Class Index failed in VOCOp.");
} }
std::shared_ptr<VOCOp> op; RETURN_IF_NOT_OK(ParseImageIds());
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(ParseAnnotationIds());
Builder().SetDir(folder_path_).SetTask("Detection").SetUsage(usage_).SetClassIndex(class_index_).Build(&op)); for (const auto &label : label_index_) {
RETURN_IF_NOT_OK(op->ParseImageIds());
RETURN_IF_NOT_OK(op->ParseAnnotationIds());
for (const auto label : op->label_index_) {
if (!class_index_.empty()) { if (!class_index_.empty()) {
(*output_class_indexing) (*output_class_indexing)
.emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first]))); .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));

View File

@ -50,106 +50,6 @@ class VOCOp : public MappableLeafOp {
public: public:
enum class TaskType { Segmentation = 0, Detection = 1 }; enum class TaskType { Segmentation = 0, Detection = 1 };
class Builder {
public:
// Constructor for Builder class of ImageFolderOp
// @param uint32_t numWrks - number of parallel workers
// @param dir - directory folder got ImageNetFolder
Builder();
// Destructor.
~Builder() = default;
// Setter method.
// @param const std::string & build_dir
// @return Builder setter method returns reference to the builder.
Builder &SetDir(const std::string &build_dir) {
builder_dir_ = build_dir;
return *this;
}
// Setter method.
// @param const std::map<std::string, int32_t> &map - a class name to label map
// @return Builder setter method returns reference to the builder.
Builder &SetClassIndex(const std::map<std::string, int32_t> &map) {
builder_labels_to_read_ = map;
return *this;
}
// Setter method.
// @param const std::string &task_type
// @return Builder setter method returns reference to the builder.
Builder &SetTask(const std::string &task_type) {
if (task_type == "Segmentation") {
builder_task_type_ = TaskType::Segmentation;
} else if (task_type == "Detection") {
builder_task_type_ = TaskType::Detection;
}
return *this;
}
// Setter method.
// @param const std::string &usage
// @return Builder setter method returns reference to the builder.
Builder &SetUsage(const std::string &usage) {
builder_usage_ = usage;
return *this;
}
// Setter method.
// @param int32_t num_workers
// @return Builder setter method returns reference to the builder.
Builder &SetNumWorkers(int32_t num_workers) {
builder_num_workers_ = num_workers;
return *this;
}
// Setter method.
// @param int32_t op_connector_size
// @return Builder setter method returns reference to the builder.
Builder &SetOpConnectorSize(int32_t op_connector_size) {
builder_op_connector_size_ = op_connector_size;
return *this;
}
// Setter method.
// @param std::shared_ptr<Sampler> sampler
// @return Builder setter method returns reference to the builder.
Builder &SetSampler(std::shared_ptr<SamplerRT> sampler) {
builder_sampler_ = std::move(sampler);
return *this;
}
// Setter method.
// @param bool do_decode
// @return Builder setter method returns reference to the builder.
Builder &SetDecode(bool do_decode) {
builder_decode_ = do_decode;
return *this;
}
// Check validity of input args
// @return Status The status code returned
Status SanityCheck();
// The builder "Build" method creates the final object.
// @param std::shared_ptr<VOCOp> *op - DatasetOp
// @return Status The status code returned
Status Build(std::shared_ptr<VOCOp> *op);
private:
bool builder_decode_;
std::string builder_dir_;
TaskType builder_task_type_;
std::string builder_usage_;
int32_t builder_num_workers_;
int32_t builder_op_connector_size_;
int32_t builder_rows_per_buffer_;
std::shared_ptr<SamplerRT> builder_sampler_;
std::unique_ptr<DataSchema> builder_schema_;
std::map<std::string, int32_t> builder_labels_to_read_;
};
// Constructor // Constructor
// @param TaskType task_type - task type of VOC // @param TaskType task_type - task type of VOC
// @param std::string task_mode - task mode of VOC // @param std::string task_mode - task mode of VOC
@ -173,24 +73,8 @@ class VOCOp : public MappableLeafOp {
// @param show_all // @param show_all
void Print(std::ostream &out, bool show_all) const override; void Print(std::ostream &out, bool show_all) const override;
// @param const std::string &dir - VOC dir path
// @param const std::string &task_type - task type of reading voc job
// @param const std::string &task_mode - task mode of reading voc job
// @param const std::map<std::string, int32_t> input_class_indexing - input map of class index
// @param int64_t *count - output rows number of VOCDataset // @param int64_t *count - output rows number of VOCDataset
static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, Status CountTotalRows(int64_t *count);
const std::map<std::string, int32_t> &input_class_indexing, int64_t *count);
#ifdef ENABLE_PYTHON
// @param const std::string &dir - VOC dir path
// @param const std::string &task_type - task type of reading voc job
// @param const std::string &task_mode - task mode of reading voc job
// @param const py::dict &dict - input dict of class index
// @param int64_t numSamples - samples number of VOCDataset
// @param std::map<std::string, int32_t> *output_class_indexing - output class index of VOCDataset
static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
const py::dict &dict, std::map<std::string, int32_t> *output_class_indexing);
#endif
// Op name getter // Op name getter
// @return Name of the current Op // @return Name of the current Op

View File

@ -156,7 +156,11 @@ Status CocoNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_g
return Status::OK(); return Status::OK();
} }
int64_t num_rows = 0, sample_size; int64_t num_rows = 0, sample_size;
RETURN_IF_NOT_OK(CocoOp::CountTotalRows(dataset_dir_, annotation_file_, task_, &num_rows)); std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(Build(&ops));
CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build CocoOp.");
auto op = std::dynamic_pointer_cast<CocoOp>(ops.front());
RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows));
std::shared_ptr<SamplerRT> sampler_rt = nullptr; std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows); sample_size = sampler_rt->CalculateNumSamples(num_rows);

View File

@ -118,8 +118,11 @@ Status ManifestNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &si
return Status::OK(); return Status::OK();
} }
int64_t num_rows, sample_size; int64_t num_rows, sample_size;
int64_t num_classes; // dummy variable std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(ManifestOp::CountTotalRows(dataset_file_, class_index_, usage_, &num_rows, &num_classes)); RETURN_IF_NOT_OK(Build(&ops));
CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build op.");
auto op = std::dynamic_pointer_cast<ManifestOp>(ops.front());
op->CountTotalRows(&num_rows);
std::shared_ptr<SamplerRT> sampler_rt = nullptr; std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows); sample_size = sampler_rt->CalculateNumSamples(num_rows);

View File

@ -143,7 +143,11 @@ Status VOCNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_ge
return Status::OK(); return Status::OK();
} }
int64_t num_rows = 0, sample_size; int64_t num_rows = 0, sample_size;
RETURN_IF_NOT_OK(VOCOp::CountTotalRows(dataset_dir_, task_, usage_, class_index_, &num_rows)); std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(Build(&ops));
CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build VocOp.");
auto op = std::dynamic_pointer_cast<VOCOp>(ops.front());
RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows));
std::shared_ptr<SamplerRT> sampler_rt = nullptr; std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows); sample_size = sampler_rt->CalculateNumSamples(num_rows);

View File

@ -1,239 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <string>
#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/datasetops/source/coco_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "securec.h"
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
class MindDataTestCocoOp : public UT::DatasetOpTesting {
protected:
};
TEST_F(MindDataTestCocoOp, TestCocoDetection) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path, annotation_path;
dataset_path = datasets_root_path_ + "/testCOCO/train/";
annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
std::string task("Detection");
std::shared_ptr<CocoOp> my_coco_op;
CocoOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetFile(annotation_path).SetTask(task).Build(&my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_coco_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 6);
}
TEST_F(MindDataTestCocoOp, TestCocoStuff) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path, annotation_path;
dataset_path = datasets_root_path_ + "/testCOCO/train/";
annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
std::string task("Stuff");
std::shared_ptr<CocoOp> my_coco_op;
CocoOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetFile(annotation_path).SetTask(task).Build(&my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_coco_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 6);
}
TEST_F(MindDataTestCocoOp, TestCocoKeypoint) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path, annotation_path;
dataset_path = datasets_root_path_ + "/testCOCO/train/";
annotation_path = datasets_root_path_ + "/testCOCO/annotations/key_point.json";
std::string task("Keypoint");
std::shared_ptr<CocoOp> my_coco_op;
CocoOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetFile(annotation_path).SetTask(task).Build(&my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_coco_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 2);
}
TEST_F(MindDataTestCocoOp, TestCocoPanoptic) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path, annotation_path;
dataset_path = datasets_root_path_ + "/testCOCO/train/";
annotation_path = datasets_root_path_ + "/testCOCO/annotations/panoptic.json";
std::string task("Panoptic");
std::shared_ptr<CocoOp> my_coco_op;
CocoOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetFile(annotation_path).SetTask(task).Build(&my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_coco_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_coco_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 2);
}

View File

@ -37,24 +37,19 @@ using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType; using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR; using mindspore::MsLogLevel::ERROR;
// std::shared_ptr<RepeatOp> Repeat(int repeatCnt);
// std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file, std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file,
std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr, std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr,
std::map<std::string, int32_t> map = {}, bool decode = false) { std::map<std::string, int32_t> map = {}, bool decode = false) {
std::shared_ptr<ManifestOp> so; if (sampler == nullptr) {
ManifestOp::Builder builder; const int64_t num_samples = 0;
Status rc = builder.SetNumWorkers(num_works) const int64_t start_index = 0;
.SetManifestFile(file) sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
.SetOpConnectorSize(conns) }
.SetSampler(std::move(sampler)) auto schema = std::make_unique<DataSchema>();
.SetClassIndex(map) schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
.SetDecode(decode) schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1));
.SetUsage(usage) return std::make_shared<ManifestOp>(num_works, file, conns, decode, map, std::move(schema), std::move(sampler),
.Build(&so); usage);
return so;
} }
class MindDataTestManifest : public UT::DatasetOpTesting { class MindDataTestManifest : public UT::DatasetOpTesting {

View File

@ -70,7 +70,6 @@ TEST_F(MindDataTestMnistSampler, TestSequentialMnistWithRepeat) {
while (row.size() != 0) { while (row.size() != 0) {
auto image = row["image"]; auto image = row["image"];
auto label = row["label"]; auto label = row["label"];
// EXPECT_EQ(label, res[i % 10]);
MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
std::shared_ptr<Tensor> de_label; std::shared_ptr<Tensor> de_label;
ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label)); ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));

View File

@ -22,6 +22,7 @@
#include "minddata/dataset/core/client.h" #include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/datasetops/source/voc_op.h" #include "minddata/dataset/engine/datasetops/source/voc_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/include/dataset/datasets.h"
#include "minddata/dataset/util/status.h" #include "minddata/dataset/util/status.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "utils/log_adapter.h" #include "utils/log_adapter.h"
@ -41,156 +42,74 @@ class MindDataTestVOCOp : public UT::DatasetOpTesting {
}; };
TEST_F(MindDataTestVOCOp, TestVOCDetection) { TEST_F(MindDataTestVOCOp, TestVOCDetection) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path; std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012"; dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
std::string task_type("Detection"); VOC(dataset_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
std::string task_mode("train"); EXPECT_NE(ds, nullptr);
std::shared_ptr<VOCOp> my_voc_op; std::shared_ptr<Iterator> iter = ds->CreateIterator();
VOCOp::Builder builder; EXPECT_NE(iter, nullptr);
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_TRUE(rc.IsOk()); ASSERT_OK(iter->GetNextRow(&row));
rc = my_tree->AssociateNode(my_voc_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_voc_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0; int row_count = 0;
while (!tensor_list.empty()) { while (row.size() != 0) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; auto image = row["image"];
auto label = row["label"];
// Display the tensor by calling the printer on it MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
for (int i = 0; i < tensor_list.size(); i++) { MS_LOG(INFO) << "Tensor label shape: " << label.Shape();
std::ostringstream ss; ASSERT_OK(iter->GetNextRow(&row));
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++; row_count++;
} }
ASSERT_EQ(row_count, 9); ASSERT_EQ(row_count, 9);
iter->Stop();
} }
TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path; std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012"; dataset_path = datasets_root_path_ + "/testVOC2012";
std::shared_ptr<Dataset> ds =
std::string task_type("Segmentation"); VOC(dataset_path, "Segmentation", "train", {}, false, std::make_shared<SequentialSampler>(0, 0));
std::string task_mode("train"); EXPECT_NE(ds, nullptr);
std::shared_ptr<VOCOp> my_voc_op; std::shared_ptr<Iterator> iter = ds->CreateIterator();
VOCOp::Builder builder; EXPECT_NE(iter, nullptr);
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_TRUE(rc.IsOk()); ASSERT_OK(iter->GetNextRow(&row));
rc = my_tree->AssociateNode(my_voc_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_voc_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0; int row_count = 0;
while (!tensor_list.empty()) { while (!row.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; auto image = row["image"];
auto target = row["target"];
// Display the tensor by calling the printer on it MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
for (int i = 0; i < tensor_list.size(); i++) { MS_LOG(INFO) << "Tensor target shape: " << target.Shape();
std::ostringstream ss; ASSERT_OK(iter->GetNextRow(&row));
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++; row_count++;
} }
ASSERT_EQ(row_count, 10); ASSERT_EQ(row_count, 10);
iter->Stop();
} }
TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
std::string dataset_path; std::string dataset_path;
dataset_path = datasets_root_path_ + "/testVOC2012"; dataset_path = datasets_root_path_ + "/testVOC2012";
std::string task_type("Detection");
std::string task_mode("train");
std::map<std::string, int32_t> class_index; std::map<std::string, int32_t> class_index;
class_index["car"] = 0; class_index["car"] = 0;
class_index["cat"] = 1; class_index["cat"] = 1;
class_index["train"] = 5; class_index["train"] = 5;
std::shared_ptr<VOCOp> my_voc_op; std::shared_ptr<Dataset> ds =
VOCOp::Builder builder; VOC(dataset_path, "Detection", "train", class_index, false, std::make_shared<SequentialSampler>(0, 0));
Status rc = EXPECT_NE(ds, nullptr);
builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).SetClassIndex(class_index).Build(&my_voc_op); std::shared_ptr<Iterator> iter = ds->CreateIterator();
ASSERT_TRUE(rc.IsOk()); EXPECT_NE(iter, nullptr);
std::unordered_map<std::string, mindspore::MSTensor> row;
rc = my_tree->AssociateNode(my_voc_op); ASSERT_OK(iter->GetNextRow(&row));
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_voc_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(DEBUG) << "Launch tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0; int row_count = 0;
while (!tensor_list.empty()) { while (!row.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; auto image = row["image"];
auto label = row["label"];
// Display the tensor by calling the printer on it MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
for (int i = 0; i < tensor_list.size(); i++) { MS_LOG(INFO) << "Tensor label shape: " << label.Shape();
std::ostringstream ss; ASSERT_OK(iter->GetNextRow(&row));
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++; row_count++;
} }
ASSERT_EQ(row_count, 6); ASSERT_EQ(row_count, 6);
iter->Stop();
} }