!19362 [assistant][ops] Add new dataset operator Cityscapes.

Merge pull request !19362 from Rainfor/cityscapes
This commit is contained in:
i-robot 2021-09-08 09:14:43 +00:00 committed by Gitee
commit 938324b397
32 changed files with 1846 additions and 1 deletions

View File

@ -91,6 +91,7 @@
#include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cityscapes_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
@ -907,6 +908,35 @@ Cifar100Dataset::Cifar100Dataset(const std::vector<char> &dataset_dir, const std
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
CityscapesDataset::CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const std::shared_ptr<Sampler> &sampler,
const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler ? sampler->Parse() : nullptr;
auto ds = std::make_shared<CityscapesNode>(CharToString(dataset_dir), CharToString(usage), CharToString(quality_mode),
CharToString(task), decode, sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
CityscapesDataset::CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler ? sampler->Parse() : nullptr;
auto ds = std::make_shared<CityscapesNode>(CharToString(dataset_dir), CharToString(usage), CharToString(quality_mode),
CharToString(task), decode, sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
CityscapesDataset::CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const std::reference_wrapper<Sampler> sampler,
const std::shared_ptr<DatasetCache> &cache) {
auto sampler_obj = sampler.get().Parse();
auto ds = std::make_shared<CityscapesNode>(CharToString(dataset_dir), CharToString(usage), CharToString(quality_mode),
CharToString(task), decode, sampler_obj, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
CLUEDataset::CLUEDataset(const std::vector<std::vector<char>> &dataset_files, const std::vector<char> &task,
const std::vector<char> &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards,
int32_t shard_id, const std::shared_ptr<DatasetCache> &cache) {

View File

@ -28,6 +28,7 @@
#include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/cityscapes_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
@ -85,6 +86,18 @@ PYBIND_REGISTER(Cifar100Node, 2, ([](const py::module *m) {
}));
}));
PYBIND_REGISTER(CityscapesNode, 2, ([](const py::module *m) {
(void)py::class_<CityscapesNode, DatasetNode, std::shared_ptr<CityscapesNode>>(
*m, "CityscapesNode", "to create a CityscapesNode")
.def(py::init([](std::string dataset_dir, std::string usage, std::string quality_mode,
std::string task, bool decode, py::handle sampler) {
auto cityscapes = std::make_shared<CityscapesNode>(dataset_dir, usage, quality_mode, task, decode,
toSamplerObj(sampler), nullptr);
THROW_IF_ERROR(cityscapes->ValidateParams());
return cityscapes;
}));
}));
PYBIND_REGISTER(CLUENode, 2, ([](const py::module *m) {
(void)py::class_<CLUENode, DatasetNode, std::shared_ptr<CLUENode>>(*m, "CLUENode",
"to create a CLUENode")

View File

@ -16,6 +16,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
album_op.cc
mappable_leaf_op.cc
nonmappable_leaf_op.cc
cityscapes_op.cc
flickr_op.cc
)

View File

@ -0,0 +1,283 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/datasetops/source/cityscapes_op.h"
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <set>
#include <utility>
#include "debug/common.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/db_connector.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace dataset {
constexpr char taskSuffix[] = "polygon";
CityscapesOp::CityscapesOp(int32_t num_workers, const std::string &dataset_dir, const std::string &usage,
const std::string &quality_mode, const std::string &task, bool decode, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
dataset_dir_(dataset_dir),
usage_(usage),
quality_mode_(quality_mode),
task_(task),
decode_(decode),
data_schema_(std::move(data_schema)) {
io_block_queues_.Init(num_workers_, queue_size);
}
Status CityscapesOp::LaunchThreadsAndInitOp() {
if (tree_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set.");
}
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
RETURN_IF_NOT_OK(
tree_->LaunchWorkers(num_workers_, std::bind(&CityscapesOp::WorkerEntry, this, std::placeholders::_1), "", id()));
TaskManager::FindMe()->Post();
// The order of the following 3 functions must not be changed!
RETURN_IF_NOT_OK(ParseCityscapesData()); // Parse Cityscapes data and get num rows, blocking
RETURN_IF_NOT_OK(CountDatasetInfo()); // Count the total rows
RETURN_IF_NOT_OK(InitSampler()); // Pass numRows to Sampler
return Status::OK();
}
// Load 1 TensorRow (image, task) using 1 ImageLabelPair. 1 function call produces 1 TensorTow
Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
RETURN_UNEXPECTED_IF_NULL(trow);
std::pair<std::string, std::string> data = image_task_pairs_[static_cast<size_t>(row_id)];
std::shared_ptr<Tensor> image;
std::shared_ptr<Tensor> task;
RETURN_IF_NOT_OK(Tensor::CreateFromFile(data.first, &image));
if (task_ != taskSuffix) {
RETURN_IF_NOT_OK(Tensor::CreateFromFile(data.second, &task));
} else {
std::ifstream file_handle(data.second);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + data.second);
}
std::string contents((std::istreambuf_iterator<char>(file_handle)), std::istreambuf_iterator<char>());
nlohmann::json contents_js = nlohmann::json::parse(contents);
Status rc = Tensor::CreateScalar(contents_js.dump(), &task);
if (rc.IsError()) {
file_handle.close();
return rc;
}
file_handle.close();
}
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.first;
RETURN_STATUS_UNEXPECTED(err);
}
if (task_ != taskSuffix) {
Status rc_t = Decode(task, &task);
if (rc_t.IsError()) {
std::string err_t = "Invalid data, failed to decode image: " + data.second;
RETURN_STATUS_UNEXPECTED(err_t);
}
}
}
(*trow) = TensorRow(row_id, {std::move(image), std::move(task)});
trow->setPath({data.first, data.second});
return Status::OK();
}
void CityscapesOp::Print(std::ostream &out, bool show_all) const {
if (!show_all) {
// Call the super class for displaying any common 1-liner info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal 1-liner info for this op
out << "\n";
} else {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nNumber of rows:" << num_rows_ << "\nCityscapes DatasetDir: " << dataset_dir_ << "\nTask: " << task_
<< "\nQualityMode: " << quality_mode_ << "\nUsage: " << usage_ << "\nDecode: " << (decode_ ? "yes" : "no")
<< "\n\n";
}
}
Status CityscapesOp::ParseCityscapesData() {
auto real_dataset_dir = Common::GetRealPath(dataset_dir_);
if (!real_dataset_dir.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
}
Path dataset_dir(real_dataset_dir.value());
std::string real_quality_mode = quality_mode_ == "fine" ? "gtFine" : "gtCoarse";
if (usage_ == "all" && quality_mode_ == "fine") {
std::vector<std::string> all_usage_fine = {"train", "test", "val"};
for (auto item : all_usage_fine) {
std::string images_dir_fine = (dataset_dir / "leftImg8bit" / item).toString();
std::string task_dir_fine = (dataset_dir / real_quality_mode / item).toString();
RETURN_IF_NOT_OK(GetCityscapesDataByUsage(images_dir_fine, task_dir_fine, real_quality_mode));
}
} else if (usage_ == "all" && quality_mode_ == "coarse") {
std::vector<std::string> all_usage_coarse = {"train", "train_extra", "val"};
for (auto item : all_usage_coarse) {
std::string images_dir_coarse = (dataset_dir / "leftImg8bit" / item).toString();
std::string task_dir_coarse = (dataset_dir / real_quality_mode / item).toString();
RETURN_IF_NOT_OK(GetCityscapesDataByUsage(images_dir_coarse, task_dir_coarse, real_quality_mode));
}
} else {
std::string images_dir = (dataset_dir / "leftImg8bit" / usage_).toString();
std::string task_dir = (dataset_dir / real_quality_mode / usage_).toString();
RETURN_IF_NOT_OK(GetCityscapesDataByUsage(images_dir, task_dir, real_quality_mode));
}
return Status::OK();
}
Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, const std::string &task_dir,
const std::string &real_quality_mode) {
const std::string kExtension = ".png";
std::string img_file_name;
std::map<std::string, std::string> image_task_map_;
Path images_dir_p(images_dir);
if (!images_dir_p.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + images_dir_p.toString() + " is an invalid directory path.");
}
Path task_dir_p(task_dir);
if (!task_dir_p.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + task_dir_p.toString() + " is an invalid directory path.");
}
std::shared_ptr<Path::DirIterator> d_it = Path::DirIterator::OpenDirectory(&images_dir_p);
if (d_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + images_dir_p.toString());
}
while (d_it->HasNext()) {
try {
Path city_dir = d_it->Next();
if (!city_dir.IsDirectory()) {
continue;
}
Path img_city_dir = images_dir_p / city_dir.Basename();
Path task_city_dir = task_dir_p / city_dir.Basename();
std::shared_ptr<Path::DirIterator> img_city_it = Path::DirIterator::OpenDirectory(&img_city_dir);
if (img_city_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + img_city_dir.toString());
}
while (img_city_it->HasNext()) {
Path img_file = img_city_it->Next();
if (img_file.Extension() != kExtension) {
continue;
}
Path image_file_path = img_city_dir / img_file.Basename();
img_file_name = img_file.Basename();
Path task_file_path = task_city_dir / (img_file_name.substr(0, img_file_name.find("_leftImg8bit")) + "_" +
GetTaskSuffix(task_, real_quality_mode));
if (!task_file_path.Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, " + task_file_path.toString() + " not found.");
}
image_task_map_[image_file_path.toString()] = task_file_path.toString();
}
} catch (const std::exception &err) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset: " + dataset_dir_);
}
}
for (auto item : image_task_map_) {
image_task_pairs_.emplace_back(std::make_pair(item.first, item.second));
}
return Status::OK();
}
std::string CityscapesOp::GetTaskSuffix(const std::string &task, const std::string &real_quality_mode) {
std::string task_suffix;
if (task == "instance") {
task_suffix = real_quality_mode + "_instanceIds.png";
} else if (task == "semantic") {
task_suffix = real_quality_mode + "_labelIds.png";
} else if (task == "color") {
task_suffix = real_quality_mode + "_color.png";
} else {
task_suffix = real_quality_mode + "_polygons.json";
}
return task_suffix;
}
Status CityscapesOp::CountDatasetInfo() {
num_rows_ = static_cast<int64_t>(image_task_pairs_.size());
if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, no valid data matching the dataset API CityscapesDataset. Please check file path or dataset API.");
}
return Status::OK();
}
Status CityscapesOp::CountTotalRows(const std::string &dir, const std::string &usage, const std::string &quality_mode,
const std::string &task, int64_t *count) {
// the logic of counting the number of samples is copied from ParseCityscapesData()
RETURN_UNEXPECTED_IF_NULL(count);
*count = 0;
const int64_t num_samples = 0;
const int64_t start_index = 0;
auto new_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
// build a new unique schema object
auto new_schema = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(new_schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
TensorShape scalar = TensorShape::CreateScalar();
if (task == "polygon") {
RETURN_IF_NOT_OK(
new_schema->AddColumn(ColDescriptor("task", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar)));
} else {
RETURN_IF_NOT_OK(
new_schema->AddColumn(ColDescriptor("task", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 0, &scalar)));
}
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
int32_t num_workers = cfg->num_parallel_workers();
int32_t op_connect_size = cfg->op_connector_size();
std::shared_ptr<CityscapesOp> op = std::make_shared<CityscapesOp>(
num_workers, dir, usage, quality_mode, task, false, op_connect_size, std::move(new_schema), std::move(new_sampler));
RETURN_IF_NOT_OK(op->ParseCityscapesData());
*count = static_cast<int64_t>(op->image_task_pairs_.size());
return Status::OK();
}
Status CityscapesOp::ComputeColMap() {
// Set the column name map (base class field)
if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->Column(i).Name()] = i;
}
} else {
MS_LOG(WARNING) << "Column name map is already set!";
}
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,131 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CITYSCAPES_OP_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CITYSCAPES_OP_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/engine/datasetops/parallel_op.h"
#include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/util/queue.h"
#include "minddata/dataset/util/services.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/util/wait_post.h"
namespace mindspore {
namespace dataset {
class CityscapesOp : public MappableLeafOp {
public:
/// \brief Constructor.
/// \param[in] int32_t num_workers - num of workers reading images in parallel.
/// \param[in] std::string dataset_dir - dir directory of Cityscapes dataset.
/// \param[in] std::string usage - the type of dataset. Acceptable usages include "train", "test", "val" or "all" if
/// quality_mode is "fine" otherwise "train", "train_extra", "val" or "all".
/// \param[in] std::string quality_mode - the quality mode of processed image. Acceptable quality_modes include
/// "fine" or "coarse".
/// \param[in] std::string task - the type of task which is used to select output data. Acceptable tasks include
/// "instance", "semantic", "polygon" or "color".
/// \param[in] bool decode - decode the images after reading.
/// \param[in] int32_t queue_size - connector queue size.
/// \param[in] DataSchema data_schema - the schema of each column in output data.
/// \param[in] std::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read.
CityscapesOp(int32_t num_workers, const std::string &dataset_dir, const std::string &usage,
const std::string &quality_mode, const std::string &task, bool decode, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
/// \brief Destructor.
~CityscapesOp() = default;
/// \brief A print method typically used for debugging.
/// \param[out] out.
/// \param[in] show_all.
void Print(std::ostream &out, bool show_all) const override;
/// \brief Function to count the number of samples in the Cityscapes dataset.
/// \param[in] dir - path to the Cityscapes directory.
/// \param[in] usage - the type of dataset. Acceptable usages include "train", "test", "val" or "all" if
/// quality_mode is "fine" otherwise "train", "train_extra", "val" or "all".
/// \param[in] quality_mode - the quality mode of processed image. Acceptable quality_modes include
/// "fine" or "coarse".
/// \param[in] task - the type of task which is used to select output data. Acceptable tasks include
/// "instance", "semantic", "polygon" or "color".
/// \param[out] count - output arg that will hold the actual dataset size.
/// \return Status - The status code returned.
static Status CountTotalRows(const std::string &dir, const std::string &usage, const std::string &quality_mode,
const std::string &task, int64_t *count);
/// \brief Op name getter.
/// \return Name of the current Op.
std::string Name() const override { return "CityscapesOp"; }
private:
/// \brief Load a tensor row according to a pair.
/// \param[in] uint64_t index - index need to load.
/// \param[out] TensorRow row - image & task read into this tensor row.
/// \return Status - The status code returned.
Status LoadTensorRow(row_id_type index, TensorRow *trow) override;
/// \brief Called first when function is called.
/// \return Status - The status code returned.
Status LaunchThreadsAndInitOp() override;
/// \brief Parse Cityscapes data.
/// \return Status - The status code returned.
Status ParseCityscapesData();
/// \brief Get Cityscapes data by usage.
/// \param[in] images_dir - path to the images in the dataset.
/// \param[in] task_dir - path to the given task file.
/// \param[in] real_quality_mode - the real quality mode of image in dataset.
/// \return Status - The status code returned.
Status GetCityscapesDataByUsage(const std::string &images_dir, const std::string &task_dir,
const std::string &real_quality_mode);
/// \brief Count label index, num rows and num samples.
/// \return Status - The status code returned.
Status CountDatasetInfo();
/// \brief Private function for computing the assignment of the column name map.
/// \return Status - The status code returned.
Status ComputeColMap() override;
/// \brief Private function for get the task suffix.
/// \param[in] task - the type of task which is used to select output data.
/// \param[in] real_quality_mode - the real quality mode of image in dataset.
/// \return std::string - the suffix of task file.
std::string GetTaskSuffix(const std::string &task, const std::string &real_quality_mode);
std::string dataset_dir_;
std::string usage_;
std::string quality_mode_;
std::string task_;
bool decode_;
std::unique_ptr<DataSchema> data_schema_;
std::vector<std::pair<std::string, std::string>> image_task_pairs_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CITYSCAPES_OP_H_

View File

@ -78,6 +78,7 @@ constexpr char kAlbumNode[] = "AlbumDataset";
constexpr char kCelebANode[] = "CelebADataset";
constexpr char kCifar100Node[] = "Cifar100Dataset";
constexpr char kCifar10Node[] = "Cifar10Dataset";
constexpr char kCityscapesNode[] = "CityscapesDataset";
constexpr char kCLUENode[] = "CLUEDataset";
constexpr char kCocoNode[] = "CocoDataset";
constexpr char kCSVNode[] = "CSVDataset";

View File

@ -7,6 +7,7 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES
celeba_node.cc
cifar100_node.cc
cifar10_node.cc
cityscapes_node.cc
clue_node.cc
coco_node.cc
csv_node.cc

View File

@ -0,0 +1,146 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/ir/datasetops/source/cityscapes_node.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "minddata/dataset/engine/datasetops/source/cityscapes_op.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
// Constructor for CityscapesNode
CityscapesNode::CityscapesNode(const std::string &dataset_dir, const std::string &usage,
const std::string &quality_mode, const std::string &task, bool decode,
std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache)
: MappableSourceNode(std::move(cache)),
dataset_dir_(dataset_dir),
usage_(usage),
quality_mode_(quality_mode),
task_(task),
sampler_(sampler),
decode_(decode) {}
std::shared_ptr<DatasetNode> CityscapesNode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
auto node = std::make_shared<CityscapesNode>(dataset_dir_, usage_, quality_mode_, task_, decode_, sampler, cache_);
return node;
}
void CityscapesNode::Print(std::ostream &out) const {
out << Name() + "(dataset dir:" + dataset_dir_;
out << ", task:" + task_ << ", quality mode:" + quality_mode_ << ", usage:" + usage_;
if (sampler_ != nullptr) {
out << ", sampler";
}
if (cache_ != nullptr) {
out << ", cache";
}
out << ")";
}
Status CityscapesNode::ValidateParams() {
RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
RETURN_IF_NOT_OK(ValidateDatasetDirParam("CityscapesNode", dataset_dir_));
RETURN_IF_NOT_OK(ValidateStringValue("CityscapesNode", task_, {"instance", "semantic", "polygon", "color"}));
RETURN_IF_NOT_OK(ValidateStringValue("CityscapesNode", quality_mode_, {"fine", "coarse"}));
if (quality_mode_ == "fine") {
RETURN_IF_NOT_OK(ValidateStringValue("CityscapesNode", usage_, {"train", "test", "val", "all"}));
} else {
RETURN_IF_NOT_OK(ValidateStringValue("CityscapesNode", usage_, {"train", "train_extra", "val", "all"}));
}
RETURN_IF_NOT_OK(ValidateDatasetSampler("CityscapesNode", sampler_));
return Status::OK();
}
// Function to build CityscapesOp for Cityscapes
Status CityscapesNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
TensorShape scalar = TensorShape::CreateScalar();
if (task_ == "polygon") {
RETURN_IF_NOT_OK(
schema->AddColumn(ColDescriptor("task", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar)));
} else {
RETURN_IF_NOT_OK(
schema->AddColumn(ColDescriptor("task", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 0, &scalar)));
}
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto cityscapes_op = std::make_shared<CityscapesOp>(num_workers_, dataset_dir_, usage_, quality_mode_, task_, decode_,
connector_que_size_, std::move(schema), std::move(sampler_rt));
cityscapes_op->set_total_repeats(GetTotalRepeats());
cityscapes_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(cityscapes_op);
return Status::OK();
}
// Get the shard id of node
Status CityscapesNode::GetShardId(int32_t *shard_id) {
*shard_id = sampler_->ShardId();
return Status::OK();
}
// Get Dataset size
Status CityscapesNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) {
if (dataset_size_ > 0) {
*dataset_size = dataset_size_;
return Status::OK();
}
int64_t num_rows, sample_size;
RETURN_IF_NOT_OK(CityscapesOp::CountTotalRows(dataset_dir_, usage_, quality_mode_, task_, &num_rows));
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
sample_size = sampler_rt->CalculateNumSamples(num_rows);
if (sample_size == -1) {
RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
}
*dataset_size = sample_size;
dataset_size_ = *dataset_size;
return Status::OK();
}
Status CityscapesNode::to_json(nlohmann::json *out_json) {
nlohmann::json args, sampler_args;
RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
args["sampler"] = sampler_args;
args["num_parallel_workers"] = num_workers_;
args["dataset_dir"] = dataset_dir_;
args["usage"] = usage_;
args["quality_mode"] = quality_mode_;
args["task"] = task_;
args["decode"] = decode_;
if (cache_ != nullptr) {
nlohmann::json cache_args;
RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
args["cache"] = cache_args;
}
*out_json = args;
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,111 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CITYSCAPES_NODE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CITYSCAPES_NODE_H_
#include <memory>
#include <string>
#include <vector>
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
namespace mindspore {
namespace dataset {
class CityscapesNode : public MappableSourceNode {
public:
/// \brief Constructor.
CityscapesNode(const std::string &dataset_dir, const std::string &usage, const std::string &quality_mode,
const std::string &task, bool decode, std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<DatasetCache> cache);
/// \brief Destructor.
~CityscapesNode() = default;
/// \brief Node name getter.
/// \return Name of the current node.
std::string Name() const override { return kCityscapesNode; }
/// \brief Print the description.
/// \param[out] out - The output stream to write output to.
void Print(std::ostream &out) const override;
/// \brief Copy the node to a new object.
/// \return A shared pointer to the new copy.
std::shared_ptr<DatasetNode> Copy() override;
/// \brief a base class override function to create the required runtime dataset op objects for this class.
/// \param[out] node_ops - A vector containing shared pointer to the Dataset Ops that this object will create.
/// \return Status Status::OK() if build successfully.
Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
/// \brief Parameters validation.
/// \return Status Status::OK() if all the parameters are valid.
Status ValidateParams() override;
/// \brief Get the shard id of node.
/// \return Status Status::OK() if get shard id successfully.
Status GetShardId(int32_t *shard_id) override;
/// \brief Base-class override for GetDatasetSize.
/// \param[in] size_getter Shared pointer to DatasetSizeGetter.
/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
/// dataset size at the expense of accuracy.
/// \param[out] dataset_size the size of the dataset.
/// \return Status of the function.
Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) override;
/// \brief Getter functions.
const std::string &DatasetDir() const { return dataset_dir_; }
/// \brief Getter functions.
const std::string &Usage() const { return usage_; }
/// \brief Getter functions.
const std::string &QualityMode() const { return quality_mode_; }
/// \brief Getter functions.
const std::string &Task() const { return task_; }
/// \brief Getter functions.
bool Decode() const { return decode_; }
/// \brief Get the arguments of node.
/// \param[out] out_json JSON string of all attributes.
/// \return Status of the function.
Status to_json(nlohmann::json *out_json) override;
/// \brief Sampler getter.
/// \return SamplerObj of the current node.
std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
/// \brief Sampler setter.
void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
private:
std::string dataset_dir_;
std::string usage_;
std::string quality_mode_;
std::string task_;
bool decode_;
std::shared_ptr<SamplerObj> sampler_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CITYSCAPES_NODE_H_

View File

@ -926,6 +926,84 @@ inline std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
return std::make_shared<Cifar100Dataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
}
class CityscapesDataset : public Dataset {
public:
explicit CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
explicit CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache);
explicit CityscapesDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
const std::vector<char> &quality_mode, const std::vector<char> &task, bool decode,
const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
~CityscapesDataset() = default;
};
/// \brief Function to create a CityscapesDataset.
/// \notes The generated dataset has two columns ["image", "task"].
/// \param[in] dataset_dir The dataset dir to be read.
/// \param[in] usage The type of dataset. Acceptable usages include "train", "test", "val" or "all" if
/// quality_mode is "fine" otherwise "train", "train_extra", "val" or "all".
/// \param[in] quality_mode The quality mode of processed image. Acceptable quality_modes include
/// "fine" or "coarse".
/// \param[in] task The type of task which is used to select output data. Acceptable tasks include
/// "instance", "semantic", "polygon" or "color".
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current CityscapesDataset.
inline std::shared_ptr<CityscapesDataset> Cityscapes(
const std::string &dataset_dir, const std::string &usage, const std::string &quality_mode, const std::string &task,
bool decode = false, const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<CityscapesDataset>(StringToChar(dataset_dir), StringToChar(usage), StringToChar(quality_mode),
StringToChar(task), decode, sampler, cache);
}
/// \brief Function to create a CityscapesDataset.
/// \notes The generated dataset has two columns ["image", "task"].
/// \param[in] dataset_dir The dataset dir to be read.
/// \param[in] usage The type of dataset. Acceptable usages include "train", "test", "val" or "all" if
/// quality_mode is "fine" otherwise "train", "train_extra", "val" or "all".
/// \param[in] quality_mode The quality mode of processed image. Acceptable quality_modes include
/// "fine" or "coarse".
/// \param[in] task The type of task which is used to select output data. Acceptable tasks include
/// "instance", "semantic", "polygon" or "color".
/// \param[in] decode Decode the images after reading.
/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current CityscapesDataset.
inline std::shared_ptr<CityscapesDataset> Cityscapes(const std::string &dataset_dir, const std::string &usage,
const std::string &quality_mode, const std::string &task,
bool decode, const Sampler *sampler,
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<CityscapesDataset>(StringToChar(dataset_dir), StringToChar(usage), StringToChar(quality_mode),
StringToChar(task), decode, sampler, cache);
}
/// \brief Function to create a CityscapesDataset.
/// \notes The generated dataset has two columns ["image", "task"].
/// \param[in] dataset_dir The dataset dir to be read.
/// \param[in] usage The type of dataset. Acceptable usages include "train", "test", "val" or "all" if
/// quality_mode is "fine" otherwise "train", "train_extra", "val" or "all".
/// \param[in] quality_mode The quality mode of processed image. Acceptable quality_modes include
/// "fine" or "coarse".
/// \param[in] task The type of task which is used to select output data. Acceptable tasks include
/// "instance", "semantic", "polygon" or "color".
/// \param[in] decode Decode the images after reading.
/// \param[in] sampler Sampler object used to choose samples from the dataset.
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current CityscapesDataset.
inline std::shared_ptr<CityscapesDataset> Cityscapes(const std::string &dataset_dir, const std::string &usage,
const std::string &quality_mode, const std::string &task,
bool decode, const std::reference_wrapper<Sampler> sampler,
const std::shared_ptr<DatasetCache> &cache = nullptr) {
return std::make_shared<CityscapesDataset>(StringToChar(dataset_dir), StringToChar(usage), StringToChar(quality_mode),
StringToChar(task), decode, sampler, cache);
}
class CLUEDataset : public Dataset {
public:
explicit CLUEDataset(const std::vector<std::vector<char>> &dataset_files, const std::vector<char> &task,

View File

@ -34,6 +34,7 @@ class Sampler : std::enable_shared_from_this<Sampler> {
friend class CelebADataset;
friend class Cifar10Dataset;
friend class Cifar100Dataset;
friend class CityscapesDataset;
friend class CLUEDataset;
friend class CocoDataset;
friend class CSVDataset;

View File

@ -64,7 +64,7 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
check_add_column, check_textfiledataset, check_concat, check_random_dataset, check_split, \
check_bucket_batch_by_length, check_cluedataset, check_save, check_csvdataset, check_paddeddataset, \
check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send, check_flickr_dataset, \
check_sb_dataset, check_flowers102dataset
check_sb_dataset, check_flowers102dataset, check_cityscapes_dataset
from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
get_prefetch_size
from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist
@ -6340,3 +6340,173 @@ class DeserializedDataset(Dataset):
json_str = json.dumps(self.input_obj)
return cde.Dataset.from_json_string(json_str)
return cde.Dataset.from_json_file(self.input_obj)
class CityscapesDataset(MappableDataset):
"""
A source dataset for reading and parsing Cityscapes dataset.
The generated dataset has two columns :py:obj:`[image, task]`.
The tensor of column :py:obj:`image` is of the uint8 type.
The tensor of column :py:obj:`task` is of the uint8 type if task is not 'polygon' otherwise task is
a string tensor with serialize json.
Args:
dataset_dir (str): Path to the root directory that contains the dataset.
usage (str): Acceptable usages include `train`, `test`, `val` or `all` if quality_mode is `fine`
otherwise `train`, `train_extra`, `val` or `all` (default=`train`).
quality_mode (str): Acceptable quality_modes include `fine` or `coarse` (default=`fine`).
task (str): Acceptable tasks include `instance`, `semantic`, `polygon` or `color` (default=`instance`).
num_samples (int, optional): The number of images to be included in the dataset.
(default=None, all images).
num_parallel_workers (int, optional): Number of workers to read the data
(default=None, number set in the config).
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
order behavior shown in the table).
decode (bool, optional): Decode the images after reading (default=False).
sampler (Sampler, optional): Object used to choose samples from the
dataset (default=None, expected order behavior shown in the table).
num_shards (int, optional): Number of shards that the dataset will be divided
into (default=None). When this argument is specified, `num_samples` reflects
the max sample number of per shard.
shard_id (int, optional): The shard ID within num_shards (default=None). This
argument can only be specified when num_shards is also specified.
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
(default=None, which means no cache is used).
Raises:
RuntimeError: If dataset_dir is invalid or does not contain data files.
RuntimeError: If num_parallel_workers exceeds the max thread numbers.
RuntimeError: If sampler and shuffle are specified at the same time.
RuntimeError: If sampler and sharding are specified at the same time.
RuntimeError: If num_shards is specified but shard_id is None.
RuntimeError: If shard_id is specified but num_shards is None.
ValueError: If dataset_dir is not exist.
ValueError: If task is invalid.
ValueError: If quality_mode is invalid.
ValueError: If usage is invalid.
ValueError: If shard_id is invalid (< 0 or >= num_shards).
Note:
- This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
The table below shows what input arguments are allowed and their expected behavior.
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
:widths: 25 25 50
:header-rows: 1
* - Parameter `sampler`
- Parameter `shuffle`
- Expected Order Behavior
* - None
- None
- random order
* - None
- True
- random order
* - None
- False
- sequential order
* - Sampler object
- None
- order defined by sampler
* - Sampler object
- True
- not allowed
* - Sampler object
- False
- not allowed
Examples:
>>> cityscapes_dataset_dir = "/path/to/cityscapes_dataset_directory"
>>>
>>> # 1) Get all samples from Cityscapes dataset in sequence
>>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, task="instance", quality_mode="fine",
>>> usage="train", shuffle=False, num_parallel_workers=1)
>>>
>>> # 2) Randomly select 350 samples from Cityscapes dataset
>>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_samples=350, shuffle=True,
>>> num_parallel_workers=1)
>>>
>>> # 3) Get samples from Cityscapes dataset for shard 0 in a 2-way distributed training
>>> dataset = ds.CityscapesDataset(dataset_dir=cityscapes_dataset_dir, num_shards=2, shard_id=0,
>>> num_parallel_workers=1)
>>>
>>> # In Cityscapes dataset, each dictionary has keys "image" and "task"
About Cityscapes dataset:
The Cityscapes dataset consists of 5000 colour images with high quality dense pixel annotations and
19998 colour images with coarser polygonal annotations in 50 cities. There are 30 classes in this
dataset and the polygonal annotations include dense semantic segmentation and instance segmentation
for vehicle and people.
You can unzip the dataset files into the following directory structure and read by MindSpore's API.
Taking the quality_mode of `fine` as an example.
.. code-block::
.
Cityscapes
leftImg8bit
| train
| | aachen
| | | aachen_000000_000019_leftImg8bit.png
| | | aachen_000001_000019_leftImg8bit.png
| | | ...
| | bochum
| | | ...
| | ...
| test
| | ...
| val
| | ...
gtFine
train
| aachen
| | aachen_000000_000019_gtFine_color.png
| | aachen_000000_000019_gtFine_instanceIds.png
| | aachen_000000_000019_gtFine_labelIds.png
| | aachen_000000_000019_gtFine_polygons.json
| | aachen_000001_000019_gtFine_color.png
| | aachen_000001_000019_gtFine_instanceIds.png
| | aachen_000001_000019_gtFine_labelIds.png
| | aachen_000001_000019_gtFine_polygons.json
| | ...
| bochum
| | ...
| ...
test
| ...
val
...
Citation:
.. code-block::
@inproceedings{Cordts2016Cityscapes,
title = {The Cityscapes Dataset for Semantic Urban Scene Understanding},
author = {Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler,
Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt},
booktitle = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2016}
}
"""
@check_cityscapes_dataset
def __init__(self, dataset_dir, usage="train", quality_mode="fine", task="instance", num_samples=None,
num_parallel_workers=None, shuffle=None, decode=None, sampler=None, num_shards=None,
shard_id=None, cache=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
self.dataset_dir = dataset_dir
self.task = task
self.quality_mode = quality_mode
self.usage = usage
self.decode = replace_none(decode, False)
def parse(self, children=None):
return cde.CityscapesNode(self.dataset_dir, self.usage, self.quality_mode, self.task, self.decode, self.sampler)

View File

@ -1426,3 +1426,39 @@ def check_sb_dataset(method):
return method(self, *args, **kwargs)
return new_method
def check_cityscapes_dataset(method):
"""A wrapper that wraps a parameter checker around the original CityScapesDataset."""
@wraps(method)
def new_method(self, *args, **kwargs):
_, param_dict = parse_user_args(method, *args, **kwargs)
nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
nreq_param_bool = ['shuffle', 'decode']
dataset_dir = param_dict.get('dataset_dir')
check_dir(dataset_dir)
task = param_dict.get('task')
check_valid_str(task, ["instance", "semantic", "polygon", "color"], "task")
quality_mode = param_dict.get('quality_mode')
check_valid_str(quality_mode, ["fine", "coarse"], "quality_mode")
usage = param_dict.get('usage')
if quality_mode == "fine":
valid_strings = ["train", "test", "val", "all"]
else:
valid_strings = ["train", "train_extra", "val", "all"]
check_valid_str(usage, valid_strings, "usage")
validate_dataset_param_value(nreq_param_int, param_dict, int)
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
check_sampler_shuffle_shard_options(param_dict)
return method(self, *args, **kwargs)
return new_method

View File

@ -17,6 +17,7 @@ SET(DE_UT_SRCS
c_api_cache_test.cc
c_api_dataset_album_test.cc
c_api_dataset_cifar_test.cc
c_api_dataset_cityscapes_test.cc
c_api_dataset_clue_test.cc
c_api_dataset_coco_test.cc
c_api_dataset_config_test.cc

View File

@ -0,0 +1,334 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include <iostream>
#include "common/common.h"
#include "minddata/dataset/include/dataset/datasets.h"
using namespace mindspore::dataset;
using mindspore::dataset::Tensor;
class MindDataTestPipeline : public UT::DatasetOpTesting {
protected:
};
TEST_F(MindDataTestPipeline, TestCityscapesBasic) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesBasic.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset
std::shared_ptr<Dataset> ds = Cityscapes(dataset_path, usage, quality_mode, task);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
auto task = row["task"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 5);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCityscapesBasicWithPipeline) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesBasicWithPipeline.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
// Create two Cityscapes Dataset
std::shared_ptr<Dataset> ds1 =
Cityscapes(dataset_path, usage, quality_mode, "color", false, std::make_shared<RandomSampler>(false, 2));
std::shared_ptr<Dataset> ds2 =
Cityscapes(dataset_path, usage, quality_mode, "color", false, std::make_shared<RandomSampler>(false, 3));
EXPECT_NE(ds1, nullptr);
EXPECT_NE(ds2, nullptr);
// Create two Repeat operation on ds
int32_t repeat_num = 3;
ds1 = ds1->Repeat(repeat_num);
EXPECT_NE(ds1, nullptr);
repeat_num = 2;
ds2 = ds2->Repeat(repeat_num);
EXPECT_NE(ds2, nullptr);
// Create two Project operation on ds
std::vector<std::string> column_project = {"image"};
ds1 = ds1->Project(column_project);
EXPECT_NE(ds1, nullptr);
ds2 = ds2->Project(column_project);
EXPECT_NE(ds2, nullptr);
// Create a Concat operation on the ds
ds1 = ds1->Concat({ds2});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds1->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 12);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCityscapesGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesGetters.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset
std::shared_ptr<Dataset> ds1 =
Cityscapes(dataset_path, usage, quality_mode, task, false, std::make_shared<RandomSampler>(false, 4));
std::shared_ptr<Dataset> ds2 = Cityscapes(dataset_path, usage, quality_mode, task);
std::vector<std::string> column_names = {"image", "task"};
EXPECT_NE(ds1, nullptr);
EXPECT_EQ(ds1->GetDatasetSize(), 4);
EXPECT_EQ(ds1->GetColumnNames(), column_names);
EXPECT_EQ(ds1->GetBatchSize(), 1);
EXPECT_NE(ds2, nullptr);
EXPECT_EQ(ds2->GetDatasetSize(), 5);
EXPECT_EQ(ds2->GetColumnNames(), column_names);
EXPECT_EQ(ds2->GetBatchSize(), 1);
}
TEST_F(MindDataTestPipeline, TestCityscapesTaskJson) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesTaskJson.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes/testTaskJson";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "polygon"; // instance semantic polygon color
std::shared_ptr<Dataset> ds = Cityscapes(dataset_path, usage, quality_mode, task);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
std::string json_file_path = dataset_path + "/gtFine/train/aa/aa_000000_gtFine_polygons.json";
std::ifstream file_handle(json_file_path);
std::string contents((std::istreambuf_iterator<char>(file_handle)), std::istreambuf_iterator<char>());
nlohmann::json contents_js = nlohmann::json::parse(contents);
std::shared_ptr<Tensor> t_expect_item;
Tensor::CreateScalar(contents_js.dump(), &t_expect_item);
file_handle.close();
mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(t_expect_item));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
auto task = row["task"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor task shape: " << task.Shape();
EXPECT_MSTENSOR_EQ(task, expect_item);
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCityscapesDecode) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesDecode.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset
std::shared_ptr<Dataset> ds =
Cityscapes(dataset_path, usage, quality_mode, task, true, std::make_shared<RandomSampler>());
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
auto task = row["task"];
EXPECT_EQ(image.Shape().size(), 3);
EXPECT_EQ(task.Shape().size(), 3);
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 5);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCityscapesNumSamplers) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesNumSamplers.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset
std::shared_ptr<Dataset> ds =
Cityscapes(dataset_path, usage, quality_mode, task, true, std::make_shared<RandomSampler>(false, 5));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
auto task = row["task"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
MS_LOG(INFO) << "Tensor task shape: " << task.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 5);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCityscapesError) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesError.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset with non-existing dataset dir
std::shared_ptr<Dataset> ds0 = Cityscapes("NotExistDir", usage, quality_mode, task);
EXPECT_NE(ds0, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter0 = ds0->CreateIterator();
// Expect failure: invalid Cityscapes input
EXPECT_EQ(iter0, nullptr);
// Create a Cityscapes Dataset with err task
std::shared_ptr<Dataset> ds1 = Cityscapes(dataset_path, usage, quality_mode, "task");
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
// Expect failure: invalid Cityscapes input
EXPECT_EQ(iter1, nullptr);
// Create a Cityscapes Dataset with err quality_mode
std::shared_ptr<Dataset> ds2 = Cityscapes(dataset_path, usage, "quality_mode", task);
EXPECT_NE(ds2, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
// Expect failure: invalid Cityscapes input
EXPECT_EQ(iter2, nullptr);
// Create a Cityscapes Dataset with err usage
std::shared_ptr<Dataset> ds3 = Cityscapes(dataset_path, "usage", quality_mode, task);
EXPECT_NE(ds3, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
// Expect failure: invalid Cityscapes input
EXPECT_EQ(iter3, nullptr);
}
TEST_F(MindDataTestPipeline, TestCityscapesWithNullSamplerError) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCityscapesWithNullSamplerError.";
std::string dataset_path = datasets_root_path_ + "/testCityscapesData/cityscapes";
std::string usage = "train"; // quality_mode=fine 'train', 'test', 'val' else 'train', 'train_extra', 'val'
std::string quality_mode = "fine"; // fine coarse
std::string task = "color"; // instance semantic polygon color
// Create a Cityscapes Dataset
std::shared_ptr<Dataset> ds = Cityscapes(dataset_path, usage, quality_mode, task, false, nullptr);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter = ds->CreateIterator();
// Expect failure: invalid Cityscapes input, sampler cannot be nullptr
EXPECT_EQ(iter, nullptr);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

View File

@ -0,0 +1,228 @@
{
"imgHeight": 1024,
"imgWidth": 2048,
"objects": [
{
"label": "sidewalk",
"polygon": [
[
2047,
532
],
[
1911,
537
],
[
1828,
540
],
[
1782,
540
],
[
1794,
552
],
[
2047,
564
]
]
},
{
"label": "car",
"polygon": [
[
806,
503
],
[
803,
524
],
[
796,
528
],
[
785,
528
],
[
778,
527
],
[
776,
520
],
[
771,
520
],
[
768,
515
],
[
732,
515
],
[
728,
523
],
[
722,
523
],
[
715,
524
],
[
714,
523
],
[
711,
520
],
[
709,
517
],
[
706,
517
],
[
702,
520
],
[
697,
529
],
[
690,
532
],
[
681,
532
],
[
678,
529
],
[
674,
527
],
[
671,
517
],
[
635,
514
],
[
632,
525
],
[
615,
526
],
[
611,
521
],
[
609,
483
],
[
620,
464
],
[
629,
459
],
[
625,
453
],
[
629,
450
],
[
634,
450
],
[
639,
455
],
[
644,
456
],
[
644,
453
],
[
643,
450
],
[
658,
429
],
[
672,
422
],
[
674,
420
],
[
766,
420
],
[
780,
430
],
[
789,
447
],
[
793,
453
],
[
800,
456
],
[
804,
470
],
[
807,
480
]
]
}
]
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -0,0 +1,280 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import pytest
import mindspore.dataset as ds
import mindspore.dataset.vision.c_transforms as c_vision
DATASET_DIR = "../data/dataset/testCityscapesData/cityscapes"
DATASET_DIR_TASK_JSON = "../data/dataset/testCityscapesData/cityscapes/testTaskJson"
def test_cityscapes_basic(plot=False):
"""
Validate CityscapesDataset basic read.
"""
task = "color" # instance semantic polygon color
quality_mode = "fine" # fine coarse
usage = "train" # quality_mode=fine 'train', 'test', 'val', 'all' else 'train', 'train_extra', 'val', 'all'
data = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task,
decode=True, shuffle=False)
count = 0
images_list = []
task_list = []
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
images_list.append(item['image'])
task_list.append(item['task'])
count = count + 1
assert count == 5
if plot:
visualize_dataset(images_list, task_list, task)
def visualize_dataset(images, labels, task):
"""
Helper function to visualize the dataset samples.
"""
if task == "polygon":
return
image_num = len(images)
for i in range(image_num):
plt.subplot(121)
plt.imshow(images[i])
plt.title('Original')
plt.subplot(122)
plt.imshow(labels[i])
plt.title(task)
plt.savefig('./cityscapes_{}_{}.jpg'.format(task, str(i)))
def test_cityscapes_polygon():
"""
Validate CityscapesDataset with task of polygon.
"""
usage = "train"
quality_mode = "fine"
task = "polygon"
data = ds.CityscapesDataset(DATASET_DIR_TASK_JSON, usage=usage, quality_mode=quality_mode, task=task)
count = 0
json_file = os.path.join(DATASET_DIR_TASK_JSON, "gtFine/train/aa/aa_000000_gtFine_polygons.json")
with open(json_file, "r") as f:
expected = json.load(f)
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
task_dict = json.loads(str(item['task'], encoding="utf-8"))
assert task_dict == expected
count = count + 1
assert count == 1
def test_cityscapes_basic_func():
"""
Validate CityscapesDataset with repeat, batch and getter operation.
"""
# case 1: test num_samples
usage = "train"
quality_mode = "fine"
task = "color"
data1 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_samples=4)
num_iter1 = 0
for _ in data1.create_dict_iterator(num_epochs=1):
num_iter1 += 1
assert num_iter1 == 4
# case 2: test repeat
data2 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_samples=5)
data2 = data2.repeat(5)
num_iter2 = 0
for _ in data2.create_dict_iterator(num_epochs=1):
num_iter2 += 1
assert num_iter2 == 25
# case 3: test batch with drop_remainder=False
data3 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, decode=True)
resize_op = c_vision.Resize((100, 100))
data3 = data3.map(operations=resize_op, input_columns=["image"], num_parallel_workers=1)
data3 = data3.map(operations=resize_op, input_columns=["task"], num_parallel_workers=1)
assert data3.get_dataset_size() == 5
assert data3.get_batch_size() == 1
data3 = data3.batch(batch_size=3) # drop_remainder is default to be False
assert data3.get_dataset_size() == 2
assert data3.get_batch_size() == 3
num_iter3 = 0
for _ in data3.create_dict_iterator(num_epochs=1):
num_iter3 += 1
assert num_iter3 == 2
# case 4: test batch with drop_remainder=True
data4 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, decode=True)
resize_op = c_vision.Resize((100, 100))
data4 = data4.map(operations=resize_op, input_columns=["image"], num_parallel_workers=1)
data4 = data4.map(operations=resize_op, input_columns=["task"], num_parallel_workers=1)
assert data4.get_dataset_size() == 5
assert data4.get_batch_size() == 1
data4 = data4.batch(batch_size=3, drop_remainder=True) # the rest of incomplete batch will be dropped
assert data4.get_dataset_size() == 1
assert data4.get_batch_size() == 3
num_iter4 = 0
for _ in data4.create_dict_iterator(num_epochs=1):
num_iter4 += 1
assert num_iter4 == 1
# case 5: test get_col_names
data5 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, decode=True)
assert data5.get_col_names() == ["image", "task"]
def test_cityscapes_sequential_sampler():
"""
Test CityscapesDataset with SequentialSampler.
"""
task = "color"
quality_mode = "fine"
usage = "train"
num_samples = 5
sampler = ds.SequentialSampler(num_samples=num_samples)
data1 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, sampler=sampler)
data2 = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task,
shuffle=False, num_samples=num_samples)
num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(item1["task"], item2["task"])
num_iter += 1
assert num_iter == num_samples
def test_cityscapes_exception():
"""
Validate CityscapesDataset with error parameters.
"""
task = "color"
quality_mode = "fine"
usage = "train"
error_msg_1 = "does not exist or is not a directory or permission denied!"
with pytest.raises(ValueError, match=error_msg_1):
ds.CityscapesDataset("NoExistsDir", usage=usage, quality_mode=quality_mode, task=task)
error_msg_2 = "sampler and shuffle cannot be specified at the same time"
with pytest.raises(RuntimeError, match=error_msg_2):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, shuffle=False,
sampler=ds.PKSampler(3))
error_msg_3 = "sampler and sharding cannot be specified at the same time"
with pytest.raises(RuntimeError, match=error_msg_3):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=2,
shard_id=0, sampler=ds.PKSampler(3))
error_msg_4 = "num_shards is specified and currently requires shard_id as well"
with pytest.raises(RuntimeError, match=error_msg_4):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=10)
error_msg_5 = "shard_id is specified but num_shards is not"
with pytest.raises(RuntimeError, match=error_msg_5):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, shard_id=0)
error_msg_6 = "Input shard_id is not within the required interval"
with pytest.raises(ValueError, match=error_msg_6):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=5, shard_id=-1)
with pytest.raises(ValueError, match=error_msg_6):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=5, shard_id=5)
with pytest.raises(ValueError, match=error_msg_6):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=2, shard_id=5)
error_msg_7 = "num_parallel_workers exceeds"
with pytest.raises(ValueError, match=error_msg_7):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, shuffle=False,
num_parallel_workers=0)
with pytest.raises(ValueError, match=error_msg_7):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, shuffle=False,
num_parallel_workers=256)
with pytest.raises(ValueError, match=error_msg_7):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, shuffle=False,
num_parallel_workers=-2)
error_msg_8 = "Argument shard_id"
with pytest.raises(TypeError, match=error_msg_8):
ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task, num_shards=2, shard_id="0")
def exception_func(item):
raise Exception("Error occur!")
try:
data = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task)
data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
num_rows = 0
for _ in data.create_dict_iterator():
num_rows += 1
assert False
except RuntimeError as e:
assert "map operation: [PyFunc] failed. The corresponding data files:" in str(e)
try:
data = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task)
data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
num_rows = 0
for _ in data.create_dict_iterator():
num_rows += 1
assert False
except RuntimeError as e:
assert "map operation: [PyFunc] failed. The corresponding data files:" in str(e)
def test_cityscapes_param():
"""
Validate CityscapesDataset with basic parameters like usage, quality_mode and task.
"""
def test_config(usage="train", quality_mode="fine", task="color"):
try:
data = ds.CityscapesDataset(DATASET_DIR, usage=usage, quality_mode=quality_mode, task=task)
num_rows = 0
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1
except (ValueError, TypeError, RuntimeError) as e:
return str(e)
return num_rows
assert test_config(usage="train") == 5
assert test_config(usage="test") == 1
assert test_config(usage="val") == 1
assert test_config(usage="all") == 7
assert "usage is not within the valid set of ['train', 'test', 'val', 'all']" \
in test_config("invalid", "fine", "instance")
assert "Argument usage with value ['list'] is not of type [<class 'str'>]" \
in test_config(["list"], "fine", "instance")
assert "quality_mode is not within the valid set of ['fine', 'coarse']" \
in test_config("train", "invalid", "instance")
assert "Argument quality_mode with value ['list'] is not of type [<class 'str'>]" \
in test_config("train", ["list"], "instance")
assert "task is not within the valid set of ['instance', 'semantic', 'polygon', 'color']." \
in test_config("train", "fine", "invalid")
assert "Argument task with value ['list'] is not of type [<class 'str'>], but got <class 'list'>." \
in test_config("train", "fine", ["list"])
if __name__ == "__main__":
test_cityscapes_basic()
test_cityscapes_polygon()
test_cityscapes_basic_func()
test_cityscapes_sequential_sampler()
test_cityscapes_exception()
test_cityscapes_param()