forked from mindspore-Ecosystem/mindspore
!31889 Add some ut pipeline test case for C++ dataset
Merge pull request !31889 from liucunwei/dataset-lcw-0324
This commit is contained in:
commit
8589335879
|
@ -120,6 +120,64 @@ TEST_F(MindDataTestPipeline, TestCaltech256DatasetWithPipeline) {
|
|||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: Caltech256IteratorOneColumn.
|
||||
/// Description: test iterator of Caltech256Dataset with only the "image" column.
|
||||
/// Expectation: get correct data.
|
||||
TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorOneColumn.";
|
||||
// Create a Caltech256 Dataset.
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::vector<mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
std::vector<int64_t> expect_image = {1, 159109};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
for (auto &v : row) {
|
||||
MS_LOG(INFO) << "image shape:" << v.Shape();
|
||||
EXPECT_EQ(expect_image, v.Shape());
|
||||
}
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 44);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: Caltech256IteratorWrongColumn.
|
||||
/// Description: test iterator of Caltech256Dataset with wrong column.
|
||||
/// Expectation: get none piece of data.
|
||||
TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorWrongColumn.";
|
||||
// Create a Caltech256 Dataset.
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: Caltech256Dataset
|
||||
/// Description: test getting size of Caltech256Dataset
|
||||
/// Expectation: the size is correct
|
||||
|
|
|
@ -189,6 +189,66 @@ TEST_F(MindDataTestPipeline, TestIMDBBasicWithPipeline) {
|
|||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: IMDBIteratorOneColumn.
|
||||
/// Description: test iterator of IMDBDataset with only the "text" column.
|
||||
/// Expectation: get correct data.
|
||||
TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorOneColumn.";
|
||||
std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
|
||||
std::string usage = "all"; // 'train', 'test', 'all'
|
||||
|
||||
// Create a IMDB Dataset
|
||||
std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::vector<mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
for (auto &v : row) {
|
||||
MS_LOG(INFO) << "text shape:" << v.Shape();
|
||||
}
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 8);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: IMDBIteratorWrongColumn.
|
||||
/// Description: test iterator of IMDBDataset with wrong column.
|
||||
/// Expectation: get none piece of data.
|
||||
TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorWrongColumn.";
|
||||
std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
|
||||
std::string usage = "all"; // 'train', 'test', 'all'
|
||||
|
||||
// Create a IMDB Dataset
|
||||
std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: Test IMDB Dataset.
|
||||
/// Description: read IMDB data with GetDatasetSize, GetColumnNames, GetBatchSize.
|
||||
/// Expectation: the data is processed successfully.
|
||||
|
|
|
@ -119,6 +119,64 @@ TEST_F(MindDataTestPipeline, TestSemeionDatasetWithPipeline) {
|
|||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: SemeionIteratorOneColumn.
|
||||
/// Description: test iterator of SemeionDataset with only the "image" column.
|
||||
/// Expectation: get correct data.
|
||||
TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorOneColumn.";
|
||||
// Create a Semeion Dataset.
|
||||
std::string folder_path = datasets_root_path_ + "/testSemeionData";
|
||||
std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// Only select "image" column and drop others
|
||||
std::vector<std::string> columns = {"image"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::vector<mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
std::vector<int64_t> expect_image = {1, 16, 16};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
for (auto &v : row) {
|
||||
MS_LOG(INFO) << "image shape:" << v.Shape();
|
||||
EXPECT_EQ(expect_image, v.Shape());
|
||||
}
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 5);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: SemeionIteratorWrongColumn.
|
||||
/// Description: test iterator of SemeionDataset with wrong column.
|
||||
/// Expectation: get none piece of data.
|
||||
TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorWrongColumn.";
|
||||
// Create a Semeion Dataset.
|
||||
std::string folder_path = datasets_root_path_ + "/testSemeionData";
|
||||
std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: SemeionDataset.
|
||||
/// Description: read number of all samples from all files according to different versions.
|
||||
/// Expectation: 10.
|
||||
|
|
|
@ -146,6 +146,80 @@ TEST_F(MindDataTestPipeline, TestWikiTextDatasetBasicWithPipeline) {
|
|||
GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
|
||||
}
|
||||
|
||||
/// Feature: WikiTextIteratorOneColumn.
|
||||
/// Description: test iterator of WikiTextDataset with only the "text" column.
|
||||
/// Expectation: get correct data.
|
||||
TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorOneColumn.";
|
||||
// Test WikiText Dataset with single text file and many default inputs
|
||||
|
||||
// Set configuration
|
||||
uint32_t original_seed = GlobalContext::config_manager()->seed();
|
||||
uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
|
||||
MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
|
||||
GlobalContext::config_manager()->set_seed(987);
|
||||
GlobalContext::config_manager()->set_num_parallel_workers(4);
|
||||
|
||||
std::string dataset_dir = datasets_root_path_ + "/testWikiText";
|
||||
std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// Only select "text" column and drop others
|
||||
std::vector<std::string> columns = {"text"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::vector<mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
std::vector<int64_t> expect_image = {1};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
for (auto &v : row) {
|
||||
MS_LOG(INFO) << "image shape:" << v.Shape();
|
||||
EXPECT_EQ(expect_image, v.Shape());
|
||||
}
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 3);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: WikiTextIteratorWrongColumn.
|
||||
/// Description: test iterator of WikiTextDataset with wrong column.
|
||||
/// Expectation: get none piece of data.
|
||||
TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorWrongColumn.";
|
||||
// Test WikiText Dataset with single text file and many default inputs
|
||||
|
||||
// Set configuration
|
||||
uint32_t original_seed = GlobalContext::config_manager()->seed();
|
||||
uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
|
||||
MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
|
||||
GlobalContext::config_manager()->set_seed(987);
|
||||
GlobalContext::config_manager()->set_num_parallel_workers(4);
|
||||
|
||||
std::string dataset_dir = datasets_root_path_ + "/testWikiText";
|
||||
std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Pass wrong column name
|
||||
std::vector<std::string> columns = {"digital"};
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: Test WikiText Dataset.
|
||||
/// Description: read WikiText data and get data.
|
||||
/// Expectation: the data is processed successfully.
|
||||
|
|
Loading…
Reference in New Issue