!3944 Minddata new dataset api ==> CelebA
Merge pull request !3944 from xiefangqi/md_new_api_CelebA
This commit is contained in:
commit
a395907182
|
@ -21,6 +21,7 @@
|
|||
#include "minddata/dataset/include/transforms.h"
|
||||
#include "minddata/dataset/engine/dataset_iterator.h"
|
||||
// Source dataset headers (in alphabetical order)
|
||||
#include "minddata/dataset/engine/datasetops/source/celeba_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/cifar_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/coco_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
|
||||
|
@ -91,6 +92,16 @@ Dataset::Dataset() {
|
|||
// FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
|
||||
// (In alphabetical order)
|
||||
|
||||
// Function to create a CelebADataset.
|
||||
std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
|
||||
const std::set<std::string> &extensions) {
|
||||
auto ds = std::make_shared<CelebADataset>(dataset_dir, dataset_type, sampler, decode, extensions);
|
||||
|
||||
// Call derived class validation method.
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
// Function to create a Cifar10Dataset.
|
||||
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) {
|
||||
auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, sampler);
|
||||
|
@ -109,7 +120,8 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, std::s
|
|||
|
||||
// Function to create a CocoDataset.
|
||||
std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
|
||||
const std::string &task, bool decode, std::shared_ptr<SamplerObj> sampler) {
|
||||
const std::string &task, const bool &decode,
|
||||
const std::shared_ptr<SamplerObj> &sampler) {
|
||||
auto ds = std::make_shared<CocoDataset>(dataset_dir, annotation_file, task, decode, sampler);
|
||||
|
||||
// Call derived class validation method.
|
||||
|
@ -334,6 +346,53 @@ bool ValidateCommonDatasetParams(std::string dataset_dir) {
|
|||
// DERIVED DATASET CLASSES LEAF-NODE DATASETS
|
||||
// (In alphabetical order)
|
||||
|
||||
// Constructor for CelebADataset
|
||||
CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
|
||||
const std::set<std::string> &extensions)
|
||||
: dataset_dir_(dataset_dir),
|
||||
dataset_type_(dataset_type),
|
||||
sampler_(sampler),
|
||||
decode_(decode),
|
||||
extensions_(extensions) {}
|
||||
|
||||
bool CelebADataset::ValidateParams() {
|
||||
Path dir(dataset_dir_);
|
||||
if (!dir.IsDirectory()) {
|
||||
MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified.";
|
||||
return false;
|
||||
}
|
||||
std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"};
|
||||
auto iter = dataset_type_list.find(dataset_type_);
|
||||
if (iter == dataset_type_list.end()) {
|
||||
MS_LOG(ERROR) << "dataset_type should be one of 'all', 'train', 'valid' or 'test'.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Function to build CelebADataset
|
||||
std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
// label is like this:0 1 0 0 1......
|
||||
RETURN_EMPTY_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
|
||||
node_ops.push_back(std::make_shared<CelebAOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||
decode_, dataset_type_, extensions_, std::move(schema),
|
||||
std::move(sampler_->Build())));
|
||||
return node_ops;
|
||||
}
|
||||
|
||||
// Constructor for Cifar10Dataset
|
||||
Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), sampler_(sampler) {}
|
||||
|
@ -396,7 +455,7 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() {
|
|||
|
||||
// Constructor for CocoDataset
|
||||
CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
bool decode, std::shared_ptr<SamplerObj> sampler)
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler)
|
||||
: dataset_dir_(dataset_dir), annotation_file_(annotation_file), task_(task), decode_(decode), sampler_(sampler) {}
|
||||
|
||||
bool CocoDataset::ValidateParams() {
|
||||
|
|
|
@ -41,6 +41,7 @@ namespace api {
|
|||
class TensorOperation;
|
||||
class SamplerObj;
|
||||
// Datasets classes (in alphabetical order)
|
||||
class CelebADataset;
|
||||
class Cifar10Dataset;
|
||||
class Cifar100Dataset;
|
||||
class CocoDataset;
|
||||
|
@ -59,6 +60,20 @@ class SkipDataset;
|
|||
class TakeDataset;
|
||||
class ZipDataset;
|
||||
|
||||
/// \brief Function to create a CelebADataset
|
||||
/// \notes The generated dataset has two columns ['image', 'attr'].
|
||||
// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
|
||||
/// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
|
||||
/// \param[in] decode Decode the images after reading (default=False).
|
||||
/// \param[in] extensions List of file extensions to be included in the dataset (default=None).
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all",
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr, const bool &decode = false,
|
||||
const std::set<std::string> &extensions = {});
|
||||
|
||||
/// \brief Function to create a Cifar10 Dataset
|
||||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
|
@ -93,8 +108,8 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
|
|||
/// will be used to randomly iterate the entire dataset
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
|
||||
const std::string &task = "Detection", bool decode = false,
|
||||
std::shared_ptr<SamplerObj> sampler = nullptr);
|
||||
const std::string &task = "Detection", const bool &decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
|
||||
/// \brief Function to create an ImageFolderDataset
|
||||
/// \notes A source dataset that reads images from a tree of directories
|
||||
|
@ -277,6 +292,32 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
|
||||
/* ####################################### Derived Dataset classes ################################# */
|
||||
|
||||
class CelebADataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
CelebADataset(const std::string &dataset_dir, const std::string &dataset_type,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
|
||||
const std::set<std::string> &extensions);
|
||||
|
||||
/// \brief Destructor
|
||||
~CelebADataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::vector<std::shared_ptr<DatasetOp>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::string dataset_dir_;
|
||||
std::string dataset_type_;
|
||||
bool decode_;
|
||||
std::set<std::string> extensions_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
};
|
||||
|
||||
class Cifar10Dataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
|
@ -322,8 +363,8 @@ class Cifar100Dataset : public Dataset {
|
|||
class CocoDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, bool decode,
|
||||
std::shared_ptr<SamplerObj> sampler);
|
||||
CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler);
|
||||
|
||||
/// \brief Destructor
|
||||
~CocoDataset() = default;
|
||||
|
|
|
@ -1639,7 +1639,7 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCocoDefault) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection.";
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDefault.";
|
||||
// Create a Coco Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCOCO/train";
|
||||
std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";
|
||||
|
@ -1675,7 +1675,7 @@ TEST_F(MindDataTestPipeline, TestCocoDefault) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCocoException) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection.";
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoException.";
|
||||
// Create a Coco Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCOCO/train";
|
||||
std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";
|
||||
|
@ -1841,3 +1841,97 @@ TEST_F(MindDataTestPipeline, TestConcatFail2) {
|
|||
ds = ds->Concat({});
|
||||
EXPECT_EQ(ds, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCelebADataset) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset.";
|
||||
|
||||
// Create a CelebA Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCelebAData/";
|
||||
std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", SequentialSampler(0, 2), false, {});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
// Check if CelebAOp read correct images/attr
|
||||
std::string expect_file[] = {"1.JPEG", "2.jpg"};
|
||||
std::vector<std::vector<uint32_t>> expect_attr_vector =
|
||||
{{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
|
||||
1, 0, 0, 1}, {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 1}};
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto image = row["image"];
|
||||
auto attr = row["attr"];
|
||||
|
||||
std::shared_ptr<Tensor> expect_image;
|
||||
Tensor::CreateFromFile(folder_path + expect_file[i], &expect_image);
|
||||
EXPECT_EQ(*image, *expect_image);
|
||||
|
||||
std::shared_ptr<Tensor> expect_attr;
|
||||
Tensor::CreateFromVector(expect_attr_vector[i], TensorShape({40}), &expect_attr);
|
||||
EXPECT_EQ(*attr, *expect_attr);
|
||||
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 2);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCelebADefault) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADefault.";
|
||||
|
||||
// Create a CelebA Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCelebAData/";
|
||||
std::shared_ptr<Dataset> ds = CelebA(folder_path);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
// Check if CelebAOp read correct images/attr
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto image = row["image"];
|
||||
auto attr = row["attr"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
MS_LOG(INFO) << "Tensor attr shape: " << attr->shape();
|
||||
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 2);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCelebAException) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebAException.";
|
||||
|
||||
// Create a CelebA Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCelebAData/";
|
||||
std::string invalid_folder_path = "./testNotExist";
|
||||
std::string invalid_dataset_type = "invalid_type";
|
||||
std::shared_ptr<Dataset> ds = CelebA(invalid_folder_path);
|
||||
EXPECT_EQ(ds, nullptr);
|
||||
std::shared_ptr<Dataset> ds1 = CelebA(folder_path, invalid_dataset_type);
|
||||
EXPECT_EQ(ds1, nullptr);
|
||||
}
|
Loading…
Reference in New Issue