!31889 Add some ut pipeline test case for C++ dataset

Merge pull request !31889 from liucunwei/dataset-lcw-0324
2022-03-25 03:27:37 +00:00 · 2022-03-25 03:27:37 +00:00 · 8589335879
parent 5f343bcd09 03b32e4eab
commit 8589335879
4 changed files with 250 additions and 0 deletions
--- a/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_caltech256_test.cc
@ -120,6 +120,64 @@ TEST_F(MindDataTestPipeline, TestCaltech256DatasetWithPipeline) {
  iter->Stop();
 }

+/// Feature: Caltech256IteratorOneColumn.
+/// Description: test iterator of Caltech256Dataset with only the "image" column.
+/// Expectation: get correct data.
+TEST_F(MindDataTestPipeline, TestCaltech256IteratorOneColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorOneColumn.";
+  // Create a Caltech256 Dataset.
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // Only select "image" column and drop others
+  std::vector<std::string> columns = {"image"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::vector<mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+  std::vector<int64_t> expect_image = {1, 159109};
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    for (auto &v : row) {
+      MS_LOG(INFO) << "image shape:" << v.Shape();
+      EXPECT_EQ(expect_image, v.Shape());
+    }
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 44);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: Caltech256IteratorWrongColumn.
+/// Description: test iterator of Caltech256Dataset with wrong column.
+/// Expectation: get none piece of data.
+TEST_F(MindDataTestPipeline, TestCaltech256IteratorWrongColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCaltech256IteratorWrongColumn.";
+  // Create a Caltech256 Dataset.
+  std::string folder_path = datasets_root_path_ + "/testPK/data/";
+  std::shared_ptr<Dataset> ds = Caltech256(folder_path, std::make_shared<RandomSampler>(false, 44));
+  EXPECT_NE(ds, nullptr);
+
+  // Pass wrong column name
+  std::vector<std::string> columns = {"digital"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
+  EXPECT_EQ(iter, nullptr);
+}
+
 /// Feature: Caltech256Dataset
 /// Description: test getting size of Caltech256Dataset
 /// Expectation: the size is correct
--- a/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_imdb_test.cc
@ -189,6 +189,66 @@ TEST_F(MindDataTestPipeline, TestIMDBBasicWithPipeline) {
  iter->Stop();
 }

+/// Feature: IMDBIteratorOneColumn.
+/// Description: test iterator of IMDBDataset with only the "text" column.
+/// Expectation: get correct data.
+TEST_F(MindDataTestPipeline, TestIMDBIteratorOneColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorOneColumn.";
+  std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
+  std::string usage = "all";  // 'train', 'test', 'all'
+
+  // Create a IMDB Dataset
+  std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // Only select "text" column and drop others
+  std::vector<std::string> columns = {"text"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::vector<mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    for (auto &v : row) {
+      MS_LOG(INFO) << "text shape:" << v.Shape();
+    }
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 8);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: IMDBIteratorWrongColumn.
+/// Description: test iterator of IMDBDataset with wrong column.
+/// Expectation: get none piece of data.
+TEST_F(MindDataTestPipeline, TestIMDBIteratorWrongColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIMDBIteratorWrongColumn.";
+  std::string dataset_path = datasets_root_path_ + "/testIMDBDataset";
+  std::string usage = "all";  // 'train', 'test', 'all'
+
+  // Create a IMDB Dataset
+  std::shared_ptr<Dataset> ds = IMDB(dataset_path, usage);
+  EXPECT_NE(ds, nullptr);
+
+  // Pass wrong column name
+  std::vector<std::string> columns = {"digital"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
+  EXPECT_EQ(iter, nullptr);
+}
+
 /// Feature: Test IMDB Dataset.
 /// Description: read IMDB data with GetDatasetSize, GetColumnNames, GetBatchSize.
 /// Expectation: the data is processed successfully.
--- a/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_semeion_test.cc
@ -119,6 +119,64 @@ TEST_F(MindDataTestPipeline, TestSemeionDatasetWithPipeline) {
  iter->Stop();
 }

+/// Feature: SemeionIteratorOneColumn.
+/// Description: test iterator of SemeionDataset with only the "image" column.
+/// Expectation: get correct data.
+TEST_F(MindDataTestPipeline, TestSemeionIteratorOneColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorOneColumn.";
+  // Create a Semeion Dataset.
+  std::string folder_path = datasets_root_path_ + "/testSemeionData";
+  std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // Only select "image" column and drop others
+  std::vector<std::string> columns = {"image"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::vector<mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+  std::vector<int64_t> expect_image = {1, 16, 16};
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    for (auto &v : row) {
+      MS_LOG(INFO) << "image shape:" << v.Shape();
+      EXPECT_EQ(expect_image, v.Shape());
+    }
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 5);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: SemeionIteratorWrongColumn.
+/// Description: test iterator of SemeionDataset with wrong column.
+/// Expectation: get none piece of data.
+TEST_F(MindDataTestPipeline, TestSemeionIteratorWrongColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionIteratorWrongColumn.";
+  // Create a Semeion Dataset.
+  std::string folder_path = datasets_root_path_ + "/testSemeionData";
+  std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr);
+  EXPECT_NE(ds, nullptr);
+
+  // Pass wrong column name
+  std::vector<std::string> columns = {"digital"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
+  EXPECT_EQ(iter, nullptr);
+}
+
 /// Feature: SemeionDataset.
 /// Description: read number of all samples from all files according to different versions.
 /// Expectation: 10.
--- a/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_wiki_text_test.cc
@ -146,6 +146,80 @@ TEST_F(MindDataTestPipeline, TestWikiTextDatasetBasicWithPipeline) {
  GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
 }

+/// Feature: WikiTextIteratorOneColumn.
+/// Description: test iterator of WikiTextDataset with only the "text" column.
+/// Expectation: get correct data.
+TEST_F(MindDataTestPipeline, TestWikiTextIteratorOneColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorOneColumn.";
+  // Test WikiText Dataset with single text file and many default inputs
+
+  // Set configuration
+  uint32_t original_seed = GlobalContext::config_manager()->seed();
+  uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
+  MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
+  GlobalContext::config_manager()->set_seed(987);
+  GlobalContext::config_manager()->set_num_parallel_workers(4);
+
+  std::string dataset_dir = datasets_root_path_ + "/testWikiText";
+  std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  // Create a Batch operation on ds
+  int32_t batch_size = 1;
+  ds = ds->Batch(batch_size);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // Only select "text" column and drop others
+  std::vector<std::string> columns = {"text"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns, -1);
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::vector<mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+  std::vector<int64_t> expect_image = {1};
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    for (auto &v : row) {
+      MS_LOG(INFO) << "image shape:" << v.Shape();
+      EXPECT_EQ(expect_image, v.Shape());
+    }
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 3);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+/// Feature: WikiTextIteratorWrongColumn.
+/// Description: test iterator of WikiTextDataset with wrong column.
+/// Expectation: get none piece of data.
+TEST_F(MindDataTestPipeline, TestWikiTextIteratorWrongColumn) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestWikiTextIteratorWrongColumn.";
+  // Test WikiText Dataset with single text file and many default inputs
+
+  // Set configuration
+  uint32_t original_seed = GlobalContext::config_manager()->seed();
+  uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
+  MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
+  GlobalContext::config_manager()->set_seed(987);
+  GlobalContext::config_manager()->set_num_parallel_workers(4);
+
+  std::string dataset_dir = datasets_root_path_ + "/testWikiText";
+  std::shared_ptr<Dataset> ds = WikiText(dataset_dir, "test", 0, ShuffleMode::kFalse);
+  EXPECT_NE(ds, nullptr);
+
+  // Pass wrong column name
+  std::vector<std::string> columns = {"digital"};
+  std::shared_ptr<Iterator> iter = ds->CreateIterator(columns);
+  EXPECT_EQ(iter, nullptr);
+}
+
 /// Feature: Test WikiText Dataset.
 /// Description: read WikiText data and get data.
 /// Expectation: the data is processed successfully.