!3610 Add C++ API support for Concat and Zip

Merge pull request !3610 from TinaMengtingZhang/cpp-api-concat
This commit is contained in:
mindspore-ci-bot 2020-08-01 02:02:09 +08:00 committed by Gitee
commit b5f317c29f
3 changed files with 384 additions and 15 deletions

View File

@ -27,6 +27,7 @@
#include "minddata/dataset/engine/datasetops/source/voc_op.h" #include "minddata/dataset/engine/datasetops/source/voc_op.h"
// Dataset operator headers (in alphabetical order) // Dataset operator headers (in alphabetical order)
#include "minddata/dataset/engine/datasetops/batch_op.h" #include "minddata/dataset/engine/datasetops/batch_op.h"
#include "minddata/dataset/engine/datasetops/concat_op.h"
#include "minddata/dataset/engine/datasetops/map_op/map_op.h" #include "minddata/dataset/engine/datasetops/map_op/map_op.h"
#include "minddata/dataset/engine/datasetops/project_op.h" #include "minddata/dataset/engine/datasetops/project_op.h"
#include "minddata/dataset/engine/datasetops/rename_op.h" #include "minddata/dataset/engine/datasetops/rename_op.h"
@ -127,6 +128,14 @@ std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<Sam
return ds->ValidateParams() ? ds : nullptr; return ds->ValidateParams() ? ds : nullptr;
} }
// Function to overload "+" operator to concat two datasets
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
const std::shared_ptr<Dataset> &datasets2) {
std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2}));
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a VOCDataset. // Function to create a VOCDataset.
std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task, const std::string &mode, std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task, const std::string &mode,
const std::map<std::string, int32_t> &class_index, bool decode, const std::map<std::string, int32_t> &class_index, bool decode,
@ -137,6 +146,14 @@ std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::strin
return ds->ValidateParams() ? ds : nullptr; return ds->ValidateParams() ? ds : nullptr;
} }
// Function to create a ZipDataset.
std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
auto ds = std::make_shared<ZipDataset>(datasets);
// Call derived class validation method.
return ds->ValidateParams() ? ds : nullptr;
}
// FUNCTIONS TO CREATE DATASETS FOR DATASET OPS // FUNCTIONS TO CREATE DATASETS FOR DATASET OPS
// (In alphabetical order) // (In alphabetical order)
@ -157,6 +174,14 @@ std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remai
return ds; return ds;
} }
// Function to create a Concat dataset
std::shared_ptr<ConcatDataset> Dataset::Concat(const std::vector<std::shared_ptr<Dataset>> &datasets) {
auto ds = std::make_shared<ConcatDataset>(datasets);
ds->children.push_back(shared_from_this());
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a Map dataset. // Function to create a Map dataset.
std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations, std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
std::vector<std::string> input_columns, std::vector<std::string> input_columns,
@ -269,16 +294,10 @@ std::shared_ptr<Dataset> Dataset::Take(int32_t count) {
// Function to create a Zip dataset // Function to create a Zip dataset
std::shared_ptr<ZipDataset> Dataset::Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) { std::shared_ptr<ZipDataset> Dataset::Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
// Default values // Default values
auto ds = std::make_shared<ZipDataset>(); auto ds = std::make_shared<ZipDataset>(datasets);
ds->children.push_back(shared_from_this());
if (!ds->ValidateParams()) { return ds->ValidateParams() ? ds : nullptr;
return nullptr;
}
for (auto dataset : datasets) {
ds->children.push_back(dataset);
}
return ds;
} }
// OTHER FUNCTIONS // OTHER FUNCTIONS
@ -526,6 +545,27 @@ bool BatchDataset::ValidateParams() {
return true; return true;
} }
// Function to build ConcatOp
ConcatDataset::ConcatDataset(const std::vector<std::shared_ptr<Dataset>> &datasets) : datasets_(datasets) {
this->children = datasets_;
}
bool ConcatDataset::ValidateParams() {
if (datasets_.empty()) {
MS_LOG(ERROR) << "Concat: concatenated datasets are not specified.";
return false;
}
return true;
}
std::vector<std::shared_ptr<DatasetOp>> ConcatDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
node_ops.push_back(std::make_shared<ConcatOp>(connector_que_size_));
return node_ops;
}
MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns, MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns,
std::vector<std::string> output_columns, const std::vector<std::string> &project_columns) std::vector<std::string> output_columns, const std::vector<std::string> &project_columns)
: operations_(operations), : operations_(operations),
@ -698,9 +738,19 @@ bool TakeDataset::ValidateParams() {
} }
// Function to build ZipOp // Function to build ZipOp
ZipDataset::ZipDataset() {} ZipDataset::ZipDataset(const std::vector<std::shared_ptr<Dataset>> &datasets) : datasets_(datasets) {
for (auto dataset : datasets_) {
this->children.push_back(dataset);
}
}
bool ZipDataset::ValidateParams() { return true; } bool ZipDataset::ValidateParams() {
if (datasets_.empty()) {
MS_LOG(ERROR) << "Zip: dataset to zip are not specified.";
return false;
}
return true;
}
std::vector<std::shared_ptr<DatasetOp>> ZipDataset::Build() { std::vector<std::shared_ptr<DatasetOp>> ZipDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create // A vector containing shared pointer to the Dataset Ops that this object will create

View File

@ -48,6 +48,7 @@ class MnistDataset;
class VOCDataset; class VOCDataset;
// Dataset Op classes (in alphabetical order) // Dataset Op classes (in alphabetical order)
class BatchDataset; class BatchDataset;
class ConcatDataset;
class MapDataset; class MapDataset;
class ProjectDataset; class ProjectDataset;
class RenameDataset; class RenameDataset;
@ -98,6 +99,14 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool de
/// \return Shared pointer to the current MnistDataset /// \return Shared pointer to the current MnistDataset
std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr); std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr);
/// \brief Function to create a ConcatDataset
/// \notes Reload "+" operator to concat two datasets
/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated
/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated
/// \return Shared pointer to the current ConcatDataset
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
const std::shared_ptr<Dataset> &datasets2);
/// \brief Function to create a VOCDataset /// \brief Function to create a VOCDataset
/// \notes The generated dataset has multi-columns : /// \notes The generated dataset has multi-columns :
/// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
@ -116,6 +125,12 @@ std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::strin
const std::map<std::string, int32_t> &class_index = {}, bool decode = false, const std::map<std::string, int32_t> &class_index = {}, bool decode = false,
std::shared_ptr<SamplerObj> sampler = nullptr); std::shared_ptr<SamplerObj> sampler = nullptr);
/// \brief Function to create a ZipDataset
/// \notes Applies zip to the dataset
/// \param[in] datasets List of shared pointers to the datasets that we want to zip
/// \return Shared pointer to the current Dataset
std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets);
/// \class Dataset datasets.h /// \class Dataset datasets.h
/// \brief A base class to represent a dataset in the data pipeline. /// \brief A base class to represent a dataset in the data pipeline.
class Dataset : public std::enable_shared_from_this<Dataset> { class Dataset : public std::enable_shared_from_this<Dataset> {
@ -158,6 +173,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \return Shared pointer to the current BatchDataset /// \return Shared pointer to the current BatchDataset
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false); std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
/// \brief Function to create a ConcatDataset
/// \notes Concat the datasets in the input
/// \param[in] datasets List of shared pointers to the dataset that should be concatenated together
/// \return Shared pointer to the current ConcatDataset
std::shared_ptr<ConcatDataset> Concat(const std::vector<std::shared_ptr<Dataset>> &datasets);
/// \brief Function to create a MapDataset /// \brief Function to create a MapDataset
/// \notes Applies each operation in operations to this dataset /// \notes Applies each operation in operations to this dataset
/// \param[in] operations Vector of operations to be applied on the dataset. Operations are /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
@ -220,7 +241,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \brief Function to create a Zip Dataset /// \brief Function to create a Zip Dataset
/// \notes Applies zip to the dataset /// \notes Applies zip to the dataset
/// \param[in] datasets A list of shared pointer to the datasets that we want to zip /// \param[in] datasets A list of shared pointers to the datasets that we want to zip
/// \return Shared pointer to the current Dataset /// \return Shared pointer to the current Dataset
std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets); std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets);
@ -377,6 +398,26 @@ class BatchDataset : public Dataset {
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
}; };
class ConcatDataset : public Dataset {
public:
/// \brief Constructor
explicit ConcatDataset(const std::vector<std::shared_ptr<Dataset>> &datasets);
/// \brief Destructor
~ConcatDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return The list of shared pointers to the newly created DatasetOps
std::vector<std::shared_ptr<DatasetOp>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::vector<std::shared_ptr<Dataset>> datasets_;
};
class MapDataset : public Dataset { class MapDataset : public Dataset {
public: public:
/// \brief Constructor /// \brief Constructor
@ -521,7 +562,7 @@ class TakeDataset : public Dataset {
class ZipDataset : public Dataset { class ZipDataset : public Dataset {
public: public:
/// \brief Constructor /// \brief Constructor
ZipDataset(); explicit ZipDataset(const std::vector<std::shared_ptr<Dataset>> &datasets);
/// \brief Destructor /// \brief Destructor
~ZipDataset() = default; ~ZipDataset() = default;
@ -533,6 +574,9 @@ class ZipDataset : public Dataset {
/// \brief Parameters validation /// \brief Parameters validation
/// \return bool true if all the params are valid /// \return bool true if all the params are valid
bool ValidateParams() override; bool ValidateParams() override;
private:
std::vector<std::shared_ptr<Dataset>> datasets_;
}; };
} // namespace api } // namespace api

View File

@ -53,6 +53,8 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchAndRepeat.";
// Create a Mnist Dataset // Create a Mnist Dataset
std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::string folder_path = datasets_root_path_ + "/testMnistData/";
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10));
@ -92,12 +94,16 @@ TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
} }
TEST_F(MindDataTestPipeline, TestMnistFail1) { TEST_F(MindDataTestPipeline, TestMnistFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFail1.";
// Create a Mnist Dataset // Create a Mnist Dataset
std::shared_ptr<Dataset> ds = Mnist("", RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = Mnist("", RandomSampler(false, 10));
EXPECT_EQ(ds, nullptr); EXPECT_EQ(ds, nullptr);
} }
TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTensorOpsAndMap.";
// Create a Mnist Dataset // Create a Mnist Dataset
std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::string folder_path = datasets_root_path_ + "/testMnistData/";
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
@ -148,6 +154,8 @@ TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
} }
TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { TEST_F(MindDataTestPipeline, TestUniformAugWithOps) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestUniformAugWithOps.";
// Create a Mnist Dataset // Create a Mnist Dataset
std::string folder_path = datasets_root_path_ + "/testMnistData/"; std::string folder_path = datasets_root_path_ + "/testMnistData/";
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
@ -199,6 +207,8 @@ TEST_F(MindDataTestPipeline, TestUniformAugWithOps) {
} }
TEST_F(MindDataTestPipeline, TestRandomFlip) { TEST_F(MindDataTestPipeline, TestRandomFlip) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomFlip.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -249,6 +259,8 @@ TEST_F(MindDataTestPipeline, TestRandomFlip) {
} }
TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -288,12 +300,16 @@ TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
} }
TEST_F(MindDataTestPipeline, TestImageFolderFail1) { TEST_F(MindDataTestPipeline, TestImageFolderFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail1.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr); std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr);
EXPECT_EQ(ds, nullptr); EXPECT_EQ(ds, nullptr);
} }
TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderWithSamplers.";
std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1); std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
EXPECT_NE(sampl, nullptr); EXPECT_NE(sampl, nullptr);
@ -353,6 +369,8 @@ TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
} }
TEST_F(MindDataTestPipeline, TestPad) { TEST_F(MindDataTestPipeline, TestPad) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPad.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -406,6 +424,8 @@ TEST_F(MindDataTestPipeline, TestPad) {
} }
TEST_F(MindDataTestPipeline, TestCutOut) { TEST_F(MindDataTestPipeline, TestCutOut) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCutOut.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -456,6 +476,8 @@ TEST_F(MindDataTestPipeline, TestCutOut) {
} }
TEST_F(MindDataTestPipeline, TestNormalize) { TEST_F(MindDataTestPipeline, TestNormalize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNormalize.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -503,6 +525,8 @@ TEST_F(MindDataTestPipeline, TestNormalize) {
} }
TEST_F(MindDataTestPipeline, TestDecode) { TEST_F(MindDataTestPipeline, TestDecode) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDecode.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10));
@ -549,6 +573,8 @@ TEST_F(MindDataTestPipeline, TestDecode) {
} }
TEST_F(MindDataTestPipeline, TestShuffleDataset) { TEST_F(MindDataTestPipeline, TestShuffleDataset) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleDataset.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -735,6 +761,8 @@ TEST_F(MindDataTestPipeline, TestTakeDatasetError1) {
} }
TEST_F(MindDataTestPipeline, TestCifar10Dataset) { TEST_F(MindDataTestPipeline, TestCifar10Dataset) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10Dataset.";
// Create a Cifar10 Dataset // Create a Cifar10 Dataset
std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
std::shared_ptr<Dataset> ds = Cifar10(folder_path, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = Cifar10(folder_path, RandomSampler(false, 10));
@ -767,6 +795,7 @@ TEST_F(MindDataTestPipeline, TestCifar10Dataset) {
} }
TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) { TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10DatasetFail1.";
// Create a Cifar10 Dataset // Create a Cifar10 Dataset
std::shared_ptr<Dataset> ds = Cifar10("", RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = Cifar10("", RandomSampler(false, 10));
@ -774,6 +803,7 @@ TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) {
} }
TEST_F(MindDataTestPipeline, TestCifar100Dataset) { TEST_F(MindDataTestPipeline, TestCifar100Dataset) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100Dataset.";
// Create a Cifar100 Dataset // Create a Cifar100 Dataset
std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
@ -808,6 +838,7 @@ TEST_F(MindDataTestPipeline, TestCifar100Dataset) {
} }
TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) { TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetFail1.";
// Create a Cifar100 Dataset // Create a Cifar100 Dataset
std::shared_ptr<Dataset> ds = Cifar100("", RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = Cifar100("", RandomSampler(false, 10));
@ -815,6 +846,8 @@ TEST_F(MindDataTestPipeline, TestCifar100DatasetFail1) {
} }
TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { TEST_F(MindDataTestPipeline, TestRandomColorAdjust) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomColorAdjust.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -873,6 +906,8 @@ TEST_F(MindDataTestPipeline, TestRandomColorAdjust) {
} }
TEST_F(MindDataTestPipeline, TestRandomRotation) { TEST_F(MindDataTestPipeline, TestRandomRotation) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomRotation.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -920,6 +955,8 @@ TEST_F(MindDataTestPipeline, TestRandomRotation) {
} }
TEST_F(MindDataTestPipeline, TestProjectMap) { TEST_F(MindDataTestPipeline, TestProjectMap) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectMap.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -972,6 +1009,9 @@ TEST_F(MindDataTestPipeline, TestProjectMap) {
} }
TEST_F(MindDataTestPipeline, TestZipSuccess) { TEST_F(MindDataTestPipeline, TestZipSuccess) {
// Testing the member zip() function
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -1000,7 +1040,7 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) {
EXPECT_NE(ds2, nullptr); EXPECT_NE(ds2, nullptr);
// Create a Zip operation on the datasets // Create a Zip operation on the datasets
ds = ds->Zip({ds, ds1, ds2}); ds = ds->Zip({ds1, ds2});
EXPECT_NE(ds, nullptr); EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds // Create a Batch operation on ds
@ -1017,6 +1057,13 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) {
std::unordered_map<std::string, std::shared_ptr<Tensor>> row; std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
iter->GetNextRow(&row); iter->GetNextRow(&row);
// Check zipped column names
EXPECT_EQ(row.size(), 4);
EXPECT_NE(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("col1"), row.end());
EXPECT_NE(row.find("col2"), row.end());
uint64_t i = 0; uint64_t i = 0;
while (row.size() != 0) { while (row.size() != 0) {
i++; i++;
@ -1031,7 +1078,62 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) {
iter->Stop(); iter->Stop();
} }
TEST_F(MindDataTestPipeline, TestZipSuccess2) {
// Testing the static zip() function
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess2.";
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 9));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds2, nullptr);
// Create a Rename operation on ds (so that the 2 datasets we are going to zip have distinct column names)
ds = ds->Rename({"image", "label"}, {"col1", "col2"});
EXPECT_NE(ds, nullptr);
// Create a Zip operation on the datasets
ds = Zip({ds, ds2});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
iter->GetNextRow(&row);
// Check zipped column names
EXPECT_EQ(row.size(), 4);
EXPECT_NE(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("col1"), row.end());
EXPECT_NE(row.find("col2"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
iter->GetNextRow(&row);
}
EXPECT_EQ(i, 9);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestZipFail) { TEST_F(MindDataTestPipeline, TestZipFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail.";
// We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns // We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns
// and zip doesn't accept datasets with same column names // and zip doesn't accept datasets with same column names
@ -1045,7 +1147,7 @@ TEST_F(MindDataTestPipeline, TestZipFail) {
EXPECT_NE(ds1, nullptr); EXPECT_NE(ds1, nullptr);
// Create a Zip operation on the datasets // Create a Zip operation on the datasets
ds = ds->Zip({ds, ds1}); ds = Zip({ds, ds1});
EXPECT_NE(ds, nullptr); EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds // Create a Batch operation on ds
@ -1059,7 +1161,24 @@ TEST_F(MindDataTestPipeline, TestZipFail) {
EXPECT_EQ(iter, nullptr); EXPECT_EQ(iter, nullptr);
} }
TEST_F(MindDataTestPipeline, TestZipFail2) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail2.";
// This case is expected to fail because the input dataset is empty.
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Zip operation on the datasets
// Input dataset to zip is empty
ds = Zip({});
EXPECT_EQ(ds, nullptr);
}
TEST_F(MindDataTestPipeline, TestRenameSuccess) { TEST_F(MindDataTestPipeline, TestRenameSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess.";
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
@ -1108,6 +1227,7 @@ TEST_F(MindDataTestPipeline, TestRenameSuccess) {
} }
TEST_F(MindDataTestPipeline, TestRenameFail) { TEST_F(MindDataTestPipeline, TestRenameFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail.";
// We expect this test to fail because input and output in Rename are not the same size // We expect this test to fail because input and output in Rename are not the same size
// Create an ImageFolder Dataset // Create an ImageFolder Dataset
@ -1127,6 +1247,7 @@ TEST_F(MindDataTestPipeline, TestRenameFail) {
TEST_F(MindDataTestPipeline, TestVOCSegmentation) { TEST_F(MindDataTestPipeline, TestVOCSegmentation) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentation."; MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentation.";
// Create a VOC Dataset // Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
std::shared_ptr<Dataset> ds = VOC(folder_path, "Segmentation", "train", {}, false, SequentialSampler(0, 3)); std::shared_ptr<Dataset> ds = VOC(folder_path, "Segmentation", "train", {}, false, SequentialSampler(0, 3));
@ -1176,6 +1297,7 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentation) {
TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) { TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError1."; MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError1.";
// Create a VOC Dataset // Create a VOC Dataset
std::map<std::string, int32_t> class_index; std::map<std::string, int32_t> class_index;
class_index["car"] = 0; class_index["car"] = 0;
@ -1188,6 +1310,7 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) {
TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) { TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrMode."; MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrMode.";
// Create a VOC Dataset // Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
std::shared_ptr<Dataset> ds_1 = VOC(folder_path, "Classification", "train", {}, false, SequentialSampler(0, 3)); std::shared_ptr<Dataset> ds_1 = VOC(folder_path, "Classification", "train", {}, false, SequentialSampler(0, 3));
@ -1201,6 +1324,7 @@ TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) {
TEST_F(MindDataTestPipeline, TestVOCDetection) { TEST_F(MindDataTestPipeline, TestVOCDetection) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCDetection."; MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCDetection.";
// Create a VOC Dataset // Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, SequentialSampler(0, 4)); std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, SequentialSampler(0, 4));
@ -1246,6 +1370,7 @@ TEST_F(MindDataTestPipeline, TestVOCDetection) {
TEST_F(MindDataTestPipeline, TestVOCClassIndex) { TEST_F(MindDataTestPipeline, TestVOCClassIndex) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCClassIndex."; MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCClassIndex.";
// Create a VOC Dataset // Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
std::map<std::string, int32_t> class_index; std::map<std::string, int32_t> class_index;
@ -1289,3 +1414,153 @@ TEST_F(MindDataTestPipeline, TestVOCClassIndex) {
// Manually terminate the pipeline // Manually terminate the pipeline
iter->Stop(); iter->Stop();
} }
TEST_F(MindDataTestPipeline, TestConcatSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess.";
// Create an ImageFolder Dataset
// Column names: {"image", "label"}
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Cifar10 Dataset
// Column names: {"image", "label"}
folder_path = datasets_root_path_ + "/testCifar10Data/";
std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, RandomSampler(false, 9));
EXPECT_NE(ds2, nullptr);
// Create a Project operation on ds
ds = ds->Project({"image"});
EXPECT_NE(ds, nullptr);
ds2 = ds2->Project({"image"});
EXPECT_NE(ds, nullptr);
// Create a Concat operation on the ds
ds = ds->Concat({ds2});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
iter->GetNextRow(&row);
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
iter->GetNextRow(&row);
}
EXPECT_EQ(i, 19);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestConcatSuccess2) {
// Test "+" operator to concat two datasets
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess2.";
// Create an ImageFolder Dataset
// Column names: {"image", "label"}
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Cifar10 Dataset
// Column names: {"image", "label"}
folder_path = datasets_root_path_ + "/testCifar10Data/";
std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, RandomSampler(false, 9));
EXPECT_NE(ds2, nullptr);
// Create a Project operation on ds
ds = ds->Project({"image"});
EXPECT_NE(ds, nullptr);
ds2 = ds2->Project({"image"});
EXPECT_NE(ds, nullptr);
// Create a Concat operation on the ds
ds = ds + ds2;
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
iter->GetNextRow(&row);
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
iter->GetNextRow(&row);
}
EXPECT_EQ(i, 19);
// Manually terminate the pipeline
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestConcatFail1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail1.";
// This case is expected to fail because the input column names of concatenated datasets are not the same
// Create an ImageFolder Dataset
// Column names: {"image", "label"}
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Rename operation on ds
ds2 = ds2->Rename({"image", "label"}, {"col1", "col2"});
EXPECT_NE(ds, nullptr);
// Create a Project operation on the ds
// Name of datasets to concat doesn't not match
ds = ds->Concat({ds2});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_EQ(iter, nullptr);
}
TEST_F(MindDataTestPipeline, TestConcatFail2) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail2.";
// This case is expected to fail because the input dataset is empty.
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Project operation on the ds
// Input dataset to concat is empty
ds = ds->Concat({});
EXPECT_EQ(ds, nullptr);
}