!31889 Add some ut pipeline test case for C++ dataset

Merge pull request !31889 from liucunwei/dataset-lcw-0324
This commit is contained in:
i-robot 2022-03-25 03:27:37 +00:00 committed by Gitee
commit 8589335879
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 250 additions and 0 deletions

View File

@ -120,6 +120,64 @@ TEST_F(MindDataTestPipeline, TestCaltech256DatasetWithPipeline) {
iter->Stop();
}
/// Feature: Caltech256IteratorOneColumn.
/// Description: test iterator of Caltech256Dataset with only the "image" column.
/// Expectation: get correct data.
TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorOneColumn.";
// Create a Caltech256 Dataset.
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::vector<mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
std::vector<int64_t> expect_image = {1, 159109};
uint64_t i = 0;
while (row.size() != 0) {
for (auto &v : row) {
MS_LOG(INFO) << "image shape:" << v.Shape();
EXPECT_EQ(expect_image, v.Shape());
}
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 44);
// Manually terminate the pipeline
iter->Stop();
}
/// Feature: Caltech256IteratorWrongColumn.
/// Description: test iterator of Caltech256Dataset with wrong column.
/// Expectation: get none piece of data.
TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorWrongColumn.";
// Create a Caltech256 Dataset.
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
EXPECT_NE(ds, nullptr);
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
EXPECT_EQ(iter, nullptr);
}
/// Feature: Caltech256Dataset
/// Description: test getting size of Caltech256Dataset
/// Expectation: the size is correct

View File

@ -189,6 +189,66 @@ TEST_F(MindDataTestPipeline, TestIMDBBasicWithPipeline) {
iter->Stop();
}
/// Feature: IMDBIteratorOneColumn.
/// Description: test iterator of IMDBDataset with only the "text" column.
/// Expectation: get correct data.
TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorOneColumn.";
std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
std::string usage = "all"; // 'train', 'test', 'all'
// Create a IMDB Dataset
std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::vector<mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
for (auto &v : row) {
MS_LOG(INFO) << "text shape:" << v.Shape();
}
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 8);
// Manually terminate the pipeline
iter->Stop();
}
/// Feature: IMDBIteratorWrongColumn.
/// Description: test iterator of IMDBDataset with wrong column.
/// Expectation: get none piece of data.
TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorWrongColumn.";
std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
std::string usage = "all"; // 'train', 'test', 'all'
// Create a IMDB Dataset
std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
EXPECT_NE(ds, nullptr);
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
EXPECT_EQ(iter, nullptr);
}
/// Feature: Test IMDB Dataset.
/// Description: read IMDB data with GetDatasetSize, GetColumnNames, GetBatchSize.
/// Expectation: the data is processed successfully.

View File

@ -119,6 +119,64 @@ TEST_F(MindDataTestPipeline, TestSemeionDatasetWithPipeline) {
iter->Stop();
}
/// Feature: SemeionIteratorOneColumn.
/// Description: test iterator of SemeionDataset with only the "image" column.
/// Expectation: get correct data.
TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorOneColumn.";
// Create a Semeion Dataset.
std::string folder_path = datasets_root_path_ + "/testSemeionData";
std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// Only select "image" column and drop others
std::vector<std::string> columns = {"image"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::vector<mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
std::vector<int64_t> expect_image = {1, 16, 16};
uint64_t i = 0;
while (row.size() != 0) {
for (auto &v : row) {
MS_LOG(INFO) << "image shape:" << v.Shape();
EXPECT_EQ(expect_image, v.Shape());
}
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 5);
// Manually terminate the pipeline
iter->Stop();
}
/// Feature: SemeionIteratorWrongColumn.
/// Description: test iterator of SemeionDataset with wrong column.
/// Expectation: get none piece of data.
TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorWrongColumn.";
// Create a Semeion Dataset.
std::string folder_path = datasets_root_path_ + "/testSemeionData";
std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
EXPECT_NE(ds, nullptr);
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
EXPECT_EQ(iter, nullptr);
}
/// Feature: SemeionDataset.
/// Description: read number of all samples from all files according to different versions.
/// Expectation: 10.

View File

@ -146,6 +146,80 @@ TEST_F(MindDataTestPipeline, TestWikiTextDatasetBasicWithPipeline) {
GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
}
/// Feature: WikiTextIteratorOneColumn.
/// Description: test iterator of WikiTextDataset with only the "text" column.
/// Expectation: get correct data.
TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorOneColumn.";
// Test WikiText Dataset with single text file and many default inputs
// Set configuration
uint32_t original_seed = GlobalContext::config_manager()->seed();
uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
GlobalContext::config_manager()->set_seed(987);
GlobalContext::config_manager()->set_num_parallel_workers(4);
std::string dataset_dir = datasets_root_path_ + "/testWikiText";
std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 1;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// Only select "text" column and drop others
std::vector<std::string> columns = {"text"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::vector<mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
std::vector<int64_t> expect_image = {1};
uint64_t i = 0;
while (row.size() != 0) {
for (auto &v : row) {
MS_LOG(INFO) << "image shape:" << v.Shape();
EXPECT_EQ(expect_image, v.Shape());
}
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 3);
// Manually terminate the pipeline
iter->Stop();
}
/// Feature: WikiTextIteratorWrongColumn.
/// Description: test iterator of WikiTextDataset with wrong column.
/// Expectation: get none piece of data.
TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorWrongColumn.";
// Test WikiText Dataset with single text file and many default inputs
// Set configuration
uint32_t original_seed = GlobalContext::config_manager()->seed();
uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
GlobalContext::config_manager()->set_seed(987);
GlobalContext::config_manager()->set_num_parallel_workers(4);
std::string dataset_dir = datasets_root_path_ + "/testWikiText";
std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Pass wrong column name
std::vector<std::string> columns = {"digital"};
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
EXPECT_EQ(iter, nullptr);
}
/// Feature: Test WikiText Dataset.
/// Description: read WikiText data and get data.
/// Expectation: the data is processed successfully.