From 098ca49dd30d679b8a91e9f34f2ae1014080a231 Mon Sep 17 00:00:00 2001 From: hetshah Date: Tue, 23 Nov 2021 22:06:41 +0000 Subject: [PATCH] remove instances of datasetiterator from old tests and move coverage to new method --- tests/ut/cpp/dataset/batch_op_test.cc | 255 +----- tests/ut/cpp/dataset/c_api_cache_test.cc | 91 +++ .../dataset/c_api_dataset_manifest_test.cc | 35 + .../ut/cpp/dataset/c_api_dataset_ops_test.cc | 554 ++++++++++++- .../dataset/c_api_dataset_randomdata_test.cc | 47 ++ .../dataset/c_api_dataset_tfrecord_test.cc | 66 ++ tests/ut/cpp/dataset/cache_op_test.cc | 223 ------ tests/ut/cpp/dataset/manifest_op_test.cc | 210 ----- tests/ut/cpp/dataset/map_op_test.cc | 752 ------------------ tests/ut/cpp/dataset/mind_record_op_test.cc | 270 ------- tests/ut/cpp/dataset/random_data_op_test.cc | 417 ---------- tests/ut/cpp/dataset/tfReader_op_test.cc | 368 --------- 12 files changed, 799 insertions(+), 2489 deletions(-) delete mode 100644 tests/ut/cpp/dataset/manifest_op_test.cc delete mode 100644 tests/ut/cpp/dataset/map_op_test.cc delete mode 100644 tests/ut/cpp/dataset/random_data_op_test.cc diff --git a/tests/ut/cpp/dataset/batch_op_test.cc b/tests/ut/cpp/dataset/batch_op_test.cc index 1c641f7d0be..e2269a6a946 100644 --- a/tests/ut/cpp/dataset/batch_op_test.cc +++ b/tests/ut/cpp/dataset/batch_op_test.cc @@ -46,260 +46,18 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting { protected: }; -TEST_F(MindDataTestBatchOp, TestSimpleBatch) { - std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; - bool success = false; - const std::shared_ptr &op = Batch(12); - EXPECT_EQ(op->Name(), "BatchOp"); - - auto tree = Build({TFReader(schema_file), op}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - } else { - int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; - de::DatasetIterator di(tree); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - std::shared_ptr t; - rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)payload, &t); - EXPECT_TRUE(rc.IsOk()); - // verify the actual data in Tensor is correct - EXPECT_EQ(*t == *tensor_map["col_sint64"], true); - // change what's in Tensor and verify this time the data is incorrect1; - EXPECT_EQ(*t == *tensor_map["col_sint16"], false); - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - if (tensor_map.size() == 0) { - success = true; - } - } - EXPECT_EQ(success, true); -} - -TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) { - std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; - bool success = false; - auto op1 = TFReader(schema_file); - auto op2 = Repeat(2); - auto op3 = Batch(7, true); - op1->SetTotalRepeats(2); - op1->SetNumRepeatsPerEpoch(2); - auto tree = Build({op1, op2, op3}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - } else { - int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, - -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; - de::DatasetIterator di(tree); - std::shared_ptr t1, t2, t3; - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)payload, &t1); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 7), &t2); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 2), &t3); - EXPECT_TRUE(rc.IsOk()); - - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - if (tensor_map.size() == 0) { - success = true; - } - } - EXPECT_EQ(success, true); -} - -TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) { - std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; - bool success = false; - auto op1 = TFReader(schema_file); - auto op2 = Repeat(2); - auto op3 = Batch(7, false); - op1->SetTotalRepeats(2); - op1->SetNumRepeatsPerEpoch(2); - auto tree = Build({op1, op2, op3}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - } else { - int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, - -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; - de::DatasetIterator di(tree); - std::shared_ptr t1, t2, t3, t4; - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)payload, &t1); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 7), &t2); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 2), &t3); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 9), &t4); - EXPECT_TRUE(rc.IsOk()); - - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t4 == *(tensor_map["col_sint64"]), true); // last call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - if (tensor_map.size() == 0) { - success = true; - } - } - EXPECT_EQ(success, true); -} - -TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) { - std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; - bool success = false; - auto op1 = TFReader(schema_file); - auto op2 = Batch(7, false); - auto op3 = Repeat(2); - op1->SetTotalRepeats(2); - op1->SetNumRepeatsPerEpoch(2); - op2->SetTotalRepeats(2); - op2->SetNumRepeatsPerEpoch(2); - auto tree = Build({op1, op2, op3}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - } else { - int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, - -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; - de::DatasetIterator di(tree); - std::shared_ptr t1, t2; - rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)payload, &t1); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 7), &t2); - EXPECT_TRUE(rc.IsOk()); - - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - if (tensor_map.size() == 0) { - success = true; - } - } - EXPECT_EQ(success, true); -} - -TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) { - std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; - bool success = false; - auto op1 = TFReader(schema_file); - auto op2 = Batch(5, true); - auto op3 = Repeat(2); - op1->SetTotalRepeats(2); - op1->SetNumRepeatsPerEpoch(2); - op2->SetTotalRepeats(2); - op2->SetNumRepeatsPerEpoch(2); - auto tree = Build({op1, op2, op3}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - } else { - int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, - -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; - de::DatasetIterator di(tree); - std::shared_ptr t1, t2; - rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)payload, &t1); - EXPECT_TRUE(rc.IsOk()); - rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), - (unsigned char *)(payload + 5), &t2); - EXPECT_TRUE(rc.IsOk()); - - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext() - - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - if (tensor_map.size() == 0) { - success = true; - } - } - EXPECT_EQ(success, true); -} - -TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) { +// This test has been disabled because PadInfo is not currently supported in the C++ API. +// Feature: Test Batch op with padding on TFReader +// Description: Create Batch operation with padding on a TFReader dataset +// Expectation: The data within the created object should match the expected data +TEST_F(MindDataTestBatchOp, DISABLED_TestSimpleBatchPadding) { std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; PadInfo m; std::shared_ptr pad_value; Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value); pad_value->SetItemAt({}, -1); m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)}); + /* std::shared_ptr config_manager = GlobalContext::config_manager(); auto op_connector_size = config_manager->op_connector_size(); @@ -309,6 +67,7 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) { pybind11::function batch_size_func; pybind11::function batch_map_func; */ + int32_t batch_size = 12; bool drop = false; std::shared_ptr op = Batch(batch_size, drop, m); diff --git a/tests/ut/cpp/dataset/c_api_cache_test.cc b/tests/ut/cpp/dataset/c_api_cache_test.cc index 343c86afff1..d23188c623d 100644 --- a/tests/ut/cpp/dataset/c_api_cache_test.cc +++ b/tests/ut/cpp/dataset/c_api_cache_test.cc @@ -944,3 +944,94 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) { std::shared_ptr iter2 = ds2->CreateIterator(); EXPECT_EQ(iter2, nullptr); } + +// Feature: Test RandomData with Cache and Repeat +// Description: Iterate through dataset and count rows +// Expectation: There should be 200 rows in the dataset +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi1) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a RandomDataset + std::shared_ptr schema = Schema(); + + ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {640, 480, 3})); + ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {})); + std::shared_ptr ds = RandomData(50, schema, {}, some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 4; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 200); + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test RandomData with Cache and Repeat +// Description: Set mem_sz such that spill occurs, iterate through dataset and count rows +// Expectation: There should be 40 rows in the dataset +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataSpillCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + // Create cache with mem_sz=4 and spill=true + std::shared_ptr some_cache = CreateDatasetCache(env_session, 4, true); + EXPECT_NE(some_cache, nullptr); + + // Create a RandomDataset + std::shared_ptr schema = Schema(); + + ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {640, 480, 3})); + ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {})); + std::shared_ptr ds = RandomData(10, schema, {}, some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 4; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 40); + + // Manually terminate the pipeline + iter->Stop(); +} diff --git a/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc b/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc index 658e15a2edc..36d476a05e3 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_manifest_test.cc @@ -419,3 +419,38 @@ TEST_F(MindDataTestPipeline, TestManifestWithNullSamplerError) { // Expect failure: invalid Manifest input, sampler cannot be nullptr EXPECT_EQ(iter, nullptr); } + +// Feature: Test SubsetRandomSampler with Manifest +// Description: Use SubsetRandomSampler with 1 index given, iterate through dataset and count rows +// Expectation: There should be 1 row in the dataset +TEST_F(MindDataTestPipeline, TestManifestSubsetRandomSampler) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestSubsetRandomSampler."; + + std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json"; + std::vector indices = {1}; + // Create a Manifest Dataset + std::shared_ptr ds = Manifest(file_path, "train", std::make_shared(indices)); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} diff --git a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc index fafead805a6..2371ef058e8 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc @@ -19,10 +19,125 @@ #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" #include "minddata/dataset/include/dataset/datasets.h" #include "minddata/dataset/include/dataset/vision.h" +#include "minddata/dataset/kernels/ir/data/transforms_ir.h" using namespace mindspore::dataset; using mindspore::dataset::Tensor; +namespace mindspore { +namespace dataset { +namespace test { +class NoOp : public TensorOp { + public: + NoOp(){}; + + ~NoOp(){}; + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override { + *output = std::move(input); + return Status::OK(); + }; + + void Print(std::ostream &out) const override { out << "NoOp"; }; + + std::string Name() const override { return kNoOp; } +}; + +class ThreeToOneOp : public TensorOp { + public: + ThreeToOneOp(){}; + + ~ThreeToOneOp(){}; + + uint32_t NumInput() override { + uint32_t numInput = 3; + return numInput; + } + + // Compute function that holds the actual implementation of the operation. + Status Compute(const TensorRow &input, TensorRow *output) override { + output->push_back(input[0]); + return Status::OK(); + }; + + void Print(std::ostream &out) const override { out << "ThreeToOneOp"; }; + + std::string Name() const override { return "ThreeToOneOp"; } +}; + +class OneToThreeOp : public TensorOp { + public: + OneToThreeOp(){}; + + ~OneToThreeOp(){}; + + uint32_t NumOutput() override { + uint32_t numOutput = 3; + return numOutput; + } + + // Compute function that holds the actual implementation of the operation. + // Simply pushing the same shared pointer of the first element of input vector three times. + Status Compute(const TensorRow &input, TensorRow *output) override { + output->push_back(input[0]); + output->push_back(input[0]); + output->push_back(input[0]); + return Status::OK(); + }; + + void Print(std::ostream &out) const override { out << "OneToThreeOp"; }; + + std::string Name() const override { return "OneToThreeOp"; }; +}; + +class NoTransform final : public TensorTransform { + public: + explicit NoTransform() {} + ~NoTransform() = default; + + protected: + std::shared_ptr Parse() override { + return std::make_shared(std::make_shared()); + } + + private: + struct Data; + std::shared_ptr data_; +}; + +class ThreeToOneTransform final : public TensorTransform { + public: + explicit ThreeToOneTransform() {} + ~ThreeToOneTransform() = default; + + protected: + std::shared_ptr Parse() override { + return std::make_shared(std::make_shared()); + } + + private: + struct Data; + std::shared_ptr data_; +}; + +class OneToThreeTransform final : public TensorTransform { + public: + explicit OneToThreeTransform() {} + ~OneToThreeTransform() = default; + + protected: + std::shared_ptr Parse() override { + return std::make_shared(std::make_shared()); + } + + private: + struct Data; + std::shared_ptr data_; +}; +} // namespace test +} // namespace dataset +} // namespace mindspore + class MindDataTestPipeline : public UT::DatasetOpTesting { protected: }; @@ -2217,4 +2332,441 @@ TEST_F(MindDataTestPipeline, TestTFRecordZip) { // Manually terminate the pipeline iter->Stop(); -} \ No newline at end of file +} + +// Feature: Test Repeat and Map with decode and resize ops on TFRecord +// Description: Iterate through dataset and count the number of rows and check the shape of the image data +// Expectation: There should be 6 rows in the dataset and shape is {30,30} +TEST_F(MindDataTestPipeline, TestTFRecordDecodeRepeatResize) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.TestTFRecordDecodeRepeatResize"; + + // Create an ImageFolder Dataset + std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; + std::shared_ptr ds = TFRecord({file_path}, "", {"image", "label"}); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::vector size = {30,30}; + std::shared_ptr decode_op = std::make_shared(); + std::shared_ptr resize_op = std::make_shared(size); + EXPECT_NE(decode_op, nullptr); + EXPECT_NE(resize_op, nullptr); + + // Create a Map operation on ds + // {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp. + ds = ds->Map({decode_op, resize_op}, {}, {}, {"image"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + // 'label' is dropped during the project op + EXPECT_EQ(row.find("label"), row.end()); + // 'image' column should still exist + EXPECT_NE(row.find("image"), row.end()); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); + EXPECT_EQ(image.Shape()[0], 30); + EXPECT_EQ(image.Shape()[1], 30); + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 6); + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Batch on TFRecord +// Description: Iterate through dataset, count the number of rows and verify the data in the row +// Expectation: There should be 1 row in the dataset and the data should the expected data +TEST_F(MindDataTestPipeline, TestBatch) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatch."; + + // Create a TFRecord Dataset + std::string file_path = datasets_root_path_ + "/testBatchDataset/test.data"; + std::vector files = {file_path}; + std::shared_ptr ds = TFRecord(files, nullptr, {}, 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 12; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector data = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; + + uint64_t i = 0; + while (row.size() != 0) { + i++; + + auto this_row = row["col_sint64"]; + auto value = this_row.Data(); + int64_t *p = (int64_t *)value.get(); + for (size_t j = 0; j < data.size(); j++) { + EXPECT_EQ(p[j], data[j]); + } + + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +void TestRepeatBatch(bool drop, uint64_t expected_rows, std::string datasets_root_path) { + // Create a TFRecord Dataset + std::string file_path = datasets_root_path + "/testBatchDataset/test.data"; + std::shared_ptr ds = TFRecord({file_path}); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 7; + ds = ds->Batch(batch_size, drop); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, expected_rows); + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Repeat and Batch on TFRecord +// Description: Apply repeat then batch with drop on and off, count rows in the dataset +// Expectation: The number of rows should equal the expected number of rows +TEST_F(MindDataTestPipeline, TestRepeatBatchDrop) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatBatchDrop."; + TestRepeatBatch(true, 3, datasets_root_path_); + TestRepeatBatch(false, 4, datasets_root_path_); +} + +void TestBatchRepeat(bool drop, uint64_t expected_rows, std::string datasets_root_path) { + // Create a TFRecord Dataset + std::string file_path = datasets_root_path + "/testBatchDataset/test.data"; + std::shared_ptr ds = TFRecord({file_path}); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 7; + ds = ds->Batch(batch_size, drop); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, expected_rows); + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Batch and Repeat on TFRecord +// Description: Apply batch then repeat with drop on and off, count rows in the dataset +// Expectation: The number of rows should equal the expected number of rows +TEST_F(MindDataTestPipeline, TestBatchDropRepeat) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchDropRepeat."; + TestBatchRepeat(true, 2, datasets_root_path_); + TestBatchRepeat(false, 4, datasets_root_path_); +} + +// Feature: Test Map on TFRecord +// Description: Apply Map with a TensorOp that does noting but swaps input columns with output column +// Expectation: "Image" column is replaced with "X" +TEST_F(MindDataTestPipeline, TestMap) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.TestMap"; + + // Create a TFRecord Dataset + std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data"; + std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json"; + std::shared_ptr ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"}, + 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr no_op = std::make_shared(); + EXPECT_NE(no_op, nullptr); + + // Create a Map operation on ds + ds = ds->Map({no_op}, {"image"}, {"X"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + while (row.size() != 0) { + EXPECT_EQ(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_NE(row.find("X"), row.end()); + EXPECT_NE(row.find("A"), row.end()); + EXPECT_NE(row.find("B"), row.end()); + + ASSERT_OK(iter->GetNextRow(&row)); + } + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Map on TFRecord +// Description: Apply Map with a TensorOp that swaps 3 input columns with 1 output column +// Expectation: "Image", "A", "B" are replaced with "X" +TEST_F(MindDataTestPipeline, Test3to1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.Test3to1"; + + // Create a TFRecord Dataset + std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data"; + std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json"; + std::shared_ptr ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"}, + 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr three_to_one_op = std::make_shared(); + EXPECT_NE(three_to_one_op, nullptr); + + // Create a Map operation on ds + ds = ds->Map({three_to_one_op}, {"image", "A", "B"}, {"X"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + while (row.size() != 0) { + EXPECT_EQ(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_NE(row.find("X"), row.end()); + EXPECT_EQ(row.find("A"), row.end()); + EXPECT_EQ(row.find("B"), row.end()); + + ASSERT_OK(iter->GetNextRow(&row)); + } + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Map on TFRecord +// Description: Apply Map with a TensorOp that swaps 1 input column with 3 output columns +// Expectation: "Image" is replaced with "X", "Y", "Z" +TEST_F(MindDataTestPipeline, Test1to3) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.Test1to3"; + + // Create a TFRecord Dataset + std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data"; + std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json"; + std::shared_ptr ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"}, + 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr one_to_three_op = std::make_shared(); + EXPECT_NE(one_to_three_op, nullptr); + + // Create a Map operation on ds + ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"}, {"X", "Y", "Z", "label", "A", "B"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + while (row.size() != 0) { + EXPECT_EQ(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_NE(row.find("A"), row.end()); + EXPECT_NE(row.find("B"), row.end()); + EXPECT_NE(row.find("X"), row.end()); + EXPECT_NE(row.find("Y"), row.end()); + EXPECT_NE(row.find("Z"), row.end()); + + EXPECT_EQ(row["X"].Shape(), std::vector({3, 4, 2})); + EXPECT_EQ(row["Y"].Shape(), std::vector({3, 4, 2})); + EXPECT_EQ(row["Z"].Shape(), std::vector({3, 4, 2})); + EXPECT_EQ(row["A"].Shape(), std::vector({1, 13, 14, 12})); + EXPECT_EQ(row["B"].Shape(), std::vector({9})); + + ASSERT_OK(iter->GetNextRow(&row)); + } + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Map on TFRecord +// Description: Apply 3to1 and then 1to3 to replace 3 input columns with 3 output columns +// Expectation: "Image", "A", "B" are replaced with "X", "y", "Z" +TEST_F(MindDataTestPipeline, TestMultiTensorOp) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.TestMultiTensorOp"; + + // Create a TFRecord Dataset + std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data"; + std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json"; + std::shared_ptr ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"}, + 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr three_to_one_op = std::make_shared(); + std::shared_ptr one_to_three_op = std::make_shared(); + EXPECT_NE(one_to_three_op, nullptr); + EXPECT_NE(three_to_one_op, nullptr); + + // Create a Map operation on ds + ds = ds->Map({three_to_one_op, one_to_three_op}, {"image", "A", "B"}, {"X", "Y", "Z"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + while (row.size() != 0) { + EXPECT_EQ(row.find("image"), row.end()); + EXPECT_NE(row.find("label"), row.end()); + EXPECT_EQ(row.find("A"), row.end()); + EXPECT_EQ(row.find("B"), row.end()); + EXPECT_NE(row.find("X"), row.end()); + EXPECT_NE(row.find("Y"), row.end()); + EXPECT_NE(row.find("Z"), row.end()); + + EXPECT_EQ(row["X"].Shape(), std::vector({3, 4, 2})); + EXPECT_EQ(row["Y"].Shape(), std::vector({3, 4, 2})); + EXPECT_EQ(row["Z"].Shape(), std::vector({3, 4, 2})); + + ASSERT_OK(iter->GetNextRow(&row)); + } + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test Repeat and Map on TFRecord +// Description: Apply Map with NoOp and Repeat with num_repeats=3, iterate through dataset and count rows +// Expectation: There should be 10 rows in the dataset +TEST_F(MindDataTestPipeline, TestTFReaderRepeatMap) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.TestTFReaderRepeatMap"; + + // Create a TFRecord Dataset + std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data"; + std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json"; + std::shared_ptr ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"}, + 0, ShuffleMode::kFalse); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr no_op = std::make_shared(); + EXPECT_NE(no_op, nullptr); + + // Create a Map operation on ds + ds = ds->Map({no_op}, {"label"}, {}); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 3; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // iterate over the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + ASSERT_OK(iter->GetNextRow(&row)); + } + + EXPECT_EQ(i, 30); + + // Manually terminate the pipeline + iter->Stop(); +} diff --git a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc index 3564b107229..cf8b8edcbc6 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc @@ -475,6 +475,53 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic7) { GlobalContext::config_manager()->set_seed(curr_seed); } +// Feature: Test Repeat and Shuffle on RandomData +// Description: Apply operations, iterate through dataset and count rows +// Expectation: There should be 30 rows in the dataset +TEST_F(MindDataTestPipeline, TestRandomDatasetBasic8) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic8."; + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + + std::string SCHEMA_FILE = datasets_root_path_ + "/testRandomData/datasetSchema2.json"; + std::shared_ptr ds = RandomData(10, SCHEMA_FILE); + EXPECT_NE(ds, nullptr); + + // Create a Shuffle operation on ds + int32_t shuffle_size = 4; + ds = ds->Shuffle(shuffle_size); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 3; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + // Check if RandomData() read correct columns + uint64_t i = 0; + while (row.size() != 0) { + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 30); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestRandomDatasetUInt8) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetUInt8."; diff --git a/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc b/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc index 6fcaa5d2c74..5112cf167dd 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_tfrecord_test.cc @@ -520,3 +520,69 @@ TEST_F(MindDataTestPipeline, TestIncorrectTFrecordFile) { auto itr = ds->CreateIterator(); EXPECT_EQ(itr, nullptr); } + +// Feature: Test TFRecord with a schema file +// Description: Create TFRecord with datasetSchema1Row.json +// Expectation: There should be 1 row in the dataset +TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic1Row) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasic."; + + // Create a TFRecord Dataset + std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; + std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json"; + std::shared_ptr ds = TFRecord({file_path}, schema_path, {}, 0); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 1); + + // Manually terminate the pipeline + iter->Stop(); +} + +// Feature: Test TFRecord with a schema file +// Description: Create TFRecord with datasetSchema7Rows.json +// Expectation: There should be 7 rows in the dataset +TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic7Row) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasic."; + + // Create a TFRecord Dataset + std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; + std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json"; + std::shared_ptr ds = TFRecord({file_path}, schema_path, {}, 0); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + uint64_t i = 0; + while (row.size() != 0) { + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 7); + + // Manually terminate the pipeline + iter->Stop(); +} \ No newline at end of file diff --git a/tests/ut/cpp/dataset/cache_op_test.cc b/tests/ut/cpp/dataset/cache_op_test.cc index 08db6d8f305..0e3b2dd9b39 100644 --- a/tests/ut/cpp/dataset/cache_op_test.cc +++ b/tests/ut/cpp/dataset/cache_op_test.cc @@ -214,229 +214,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestConcurrencyRequest) { ASSERT_TRUE(rc.IsOk()); } -// Simple test with a repeated cache op over random data producer -// -// RepeatOp -// | -// CacheOp -// | -// RandomDataOp -// -TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) { - // Clear the rc of the master thread if any - (void)TaskManager::GetMasterThreadRc(); - Status rc; - int32_t rank = 0; // not used - - session_id_type env_session; - rc = GetSessionFromEnv(&env_session); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "UT test TestRandomDataCache1"; - // Start with an empty execution tree - auto myTree = std::make_shared(); - - // Create a schema using the C api's - std::unique_ptr test_schema = std::make_unique(); - - // 2 columns. First column is an "image" 640,480,3 - TensorShape c1Shape({640, 480, 3}); - ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible, - rank, // not used - &c1Shape); - - // Column 2 will just be a scalar label number - TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor - ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape); - - test_schema->AddColumn(c1); - test_schema->AddColumn(c2); - - // RandomDataOp - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - std::shared_ptr myRandomDataOp = - std::make_shared(4, op_connector_size, 50, std::move(test_schema)); - - rc = myTree->AssociateNode(myRandomDataOp); - ASSERT_TRUE(rc.IsOk()); - - // CacheOp - // size of 0, spilling is true - CacheClient::Builder builder; - builder.SetSessionId(env_session).SetCacheMemSz(0).SetSpill(true); - std::shared_ptr myClient; - rc = builder.Build(&myClient); - ASSERT_TRUE(rc.IsOk()); - - int64_t num_samples = 0; - int64_t start_index = 0; - auto seq_sampler = std::make_shared(start_index, num_samples); - std::shared_ptr myCacheOp = - std::make_shared(5, op_connector_size, myClient, std::move(seq_sampler)); - ASSERT_NE(myCacheOp, nullptr); - rc = myTree->AssociateNode(myCacheOp); - ASSERT_TRUE(rc.IsOk()); - - // RepeatOp - uint32_t num_repeats = 4; - std::shared_ptr myRepeatOp = std::make_shared(num_repeats); - rc = myTree->AssociateNode(myRepeatOp); - ASSERT_TRUE(rc.IsOk()); - - // Assign tree relations and root - myCacheOp->SetTotalRepeats(num_repeats); - myCacheOp->SetNumRepeatsPerEpoch(num_repeats); - rc = myRepeatOp->AddChild(myCacheOp); - ASSERT_TRUE(rc.IsOk()); - // Always set to 1 under a CacheOp because we read from it only once. The CacheOp is the one that repeats. - myRandomDataOp->SetTotalRepeats(1); - myRandomDataOp->SetNumRepeatsPerEpoch(1); - rc = myCacheOp->AddChild(myRandomDataOp); - ASSERT_TRUE(rc.IsOk()); - rc = myTree->AssignRoot(myRepeatOp); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - // quick check to see what tree looks like - std::ostringstream ss; - ss << *myTree; // some funny const error if I try to write directly to ms log stream - MS_LOG(INFO) << "Here's the tree:\n" << ss.str(); - - std::cout << *myClient << std::endl; - - rc = myTree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - ASSERT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - // Don't display these rows, just count them - MS_LOG(INFO) << "Row fetched #: " << rowCount; - rc = dI.FetchNextTensorRow(&tensorList); - ASSERT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 200); - rc = myClient->DestroyCache(); - ASSERT_TRUE(rc.IsOk()); -} - -//// Simple test with a repeated cache op over random data producer. -//// This one will exceed memory and require a spill. -//// -//// RepeatOp -//// | -//// CacheOp -//// | -//// RandomDataOp -//// -TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) { - // Clear the rc of the master thread if any - (void)TaskManager::GetMasterThreadRc(); - Status rc; - int32_t rank = 0; // not used - MS_LOG(INFO) << "UT test TestRandomDataCacheSpill"; - - session_id_type env_session; - rc = GetSessionFromEnv(&env_session); - ASSERT_TRUE(rc.IsOk()); - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - // Create a schema using the C api's - std::unique_ptr test_schema = std::make_unique(); - - // 2 columns. First column is an "image" 640,480,3 - TensorShape c1Shape({640, 480, 3}); - ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible, - rank, // not used - &c1Shape); - - // Column 2 will just be a scalar label number - TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor - ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape); - - test_schema->AddColumn(c1); - test_schema->AddColumn(c2); - - // RandomDataOp - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - std::shared_ptr myRandomDataOp = - std::make_shared(4, op_connector_size, 10, std::move(test_schema)); - rc = myTree->AssociateNode(myRandomDataOp); - ASSERT_TRUE(rc.IsOk()); - - // CacheOp - int64_t num_samples = 0; - int64_t start_index = 0; - auto seq_sampler = std::make_shared(start_index, num_samples); - CacheClient::Builder builder; - builder.SetSessionId(env_session).SetCacheMemSz(4).SetSpill(true); - std::shared_ptr myClient; - rc = builder.Build(&myClient); - ASSERT_TRUE(rc.IsOk()); - std::shared_ptr myCacheOp = - std::make_shared(4, op_connector_size, myClient, std::move(seq_sampler)); - ASSERT_NE(myCacheOp, nullptr); - rc = myTree->AssociateNode(myCacheOp); - ASSERT_TRUE(rc.IsOk()); - - // RepeatOp - uint32_t num_repeats = 4; - std::shared_ptr myRepeatOp = std::make_shared(num_repeats); - rc = myTree->AssociateNode(myRepeatOp); - ASSERT_TRUE(rc.IsOk()); - - // Assign tree relations and root - myCacheOp->SetTotalRepeats(num_repeats); - myCacheOp->SetNumRepeatsPerEpoch(num_repeats); - rc = myRepeatOp->AddChild(myCacheOp); - ASSERT_TRUE(rc.IsOk()); - // Always set to 1 under a CacheOp because we read from it only once. The CacheOp is the one that repeats. - myRandomDataOp->SetTotalRepeats(1); - myRandomDataOp->SetNumRepeatsPerEpoch(1); - rc = myCacheOp->AddChild(myRandomDataOp); - ASSERT_TRUE(rc.IsOk()); - rc = myTree->AssignRoot(myRepeatOp); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - std::cout << *myClient << std::endl; - - rc = myTree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - ASSERT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - // Don't display these rows, just count them - MS_LOG(INFO) << "Row fetched #: " << rowCount; - rc = dI.FetchNextTensorRow(&tensorList); - ASSERT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 40); - rc = myClient->DestroyCache(); - ASSERT_TRUE(rc.IsOk()); -} - TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) { // Clear the rc of the master thread if any (void)TaskManager::GetMasterThreadRc(); diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc deleted file mode 100644 index 8a92ee6ce12..00000000000 --- a/tests/ut/cpp/dataset/manifest_op_test.cc +++ /dev/null @@ -1,210 +0,0 @@ -/** - * Copyright 2019-2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include - -#include "common/common.h" -#include "utils/ms_utils.h" -#include "minddata/dataset/core/client.h" -#include "minddata/dataset/core/global_context.h" -#include "minddata/dataset/engine/datasetops/source/manifest_op.h" -#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" -#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" -#include "minddata/dataset/util/status.h" -#include "gtest/gtest.h" -#include "utils/log_adapter.h" -#include "securec.h" - -namespace common = mindspore::common; - -using namespace mindspore::dataset; -using mindspore::LogStream; -using mindspore::ExceptionType::NoExceptionType; -using mindspore::MsLogLevel::ERROR; - -std::shared_ptr Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file, - std::string usage = "train", std::shared_ptr sampler = nullptr, - std::map map = {}, bool decode = false) { - if (sampler == nullptr) { - const int64_t num_samples = 0; - const int64_t start_index = 0; - sampler = std::make_shared(start_index, num_samples); - } - auto schema = std::make_unique(); - schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); - schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)); - return std::make_shared(num_works, file, conns, decode, map, std::move(schema), std::move(sampler), - usage); -} - -class MindDataTestManifest : public UT::DatasetOpTesting { - protected: -}; - -TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) { - std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; - auto op1 = Manifest(16, 2, 32, file); - auto op2 = Repeat(2); - op1->SetTotalRepeats(2); - op1->SetNumRepeatsPerEpoch(2); - auto tree = Build({op1, op2}); - tree->Prepare(); - uint32_t res[] = {0, 1, 0, 1}; - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - EXPECT_TRUE(false); - } else { - DatasetIterator di(tree); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - EXPECT_TRUE(res[i] == label); - MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; - i++; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - } - EXPECT_TRUE(i == 4); - } -} - -TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) { - std::vector indices({1}); - int64_t num_samples = 0; - std::shared_ptr sampler = std::make_shared(indices, num_samples); - std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; - // Expect 6 samples for label 0 and 1 - auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - EXPECT_TRUE(false); - } else { - DatasetIterator di(tree); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - i++; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_EQ(label, 1); - } - EXPECT_TRUE(i == 1); - } -} - -TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) { - std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; - std::map map; - map["cat"] = 111; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\' - map["dog"] = 222; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\' - map["wrong folder name"] = 1234; // this is skipped - auto tree = Build({Manifest(16, 2, 32, file, "train", nullptr, map)}); - uint64_t res[2] = {111, 222}; - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - EXPECT_TRUE(false); - } else { - DatasetIterator di(tree); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - EXPECT_TRUE(label == res[i]); - MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; - i++; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - } - EXPECT_TRUE(i == 2); - } -} - -TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) { - std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; - int64_t num_samples = 1; - int64_t start_index = 0; - auto seq_sampler = std::make_shared(start_index, num_samples); - auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {}); - auto op2 = Repeat(4); - op1->SetTotalRepeats(4); - op1->SetNumRepeatsPerEpoch(4); - auto tree = Build({op1, op2}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - EXPECT_TRUE(false); - } else { - DatasetIterator di(tree); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - EXPECT_TRUE(0 == label); - MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; - i++; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - } - EXPECT_TRUE(i == 4); - } -} - -TEST_F(MindDataTestManifest, MindDataTestManifestEval) { - std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; - int64_t num_samples = 1; - int64_t start_index = 0; - auto seq_sampler = std::make_shared(start_index, num_samples); - auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})}); - tree->Prepare(); - Status rc = tree->Launch(); - if (rc.IsError()) { - MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; - EXPECT_TRUE(false); - } else { - DatasetIterator di(tree); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - EXPECT_TRUE(0 == label); - MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; - i++; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - } - EXPECT_TRUE(i == 1); - } -} diff --git a/tests/ut/cpp/dataset/map_op_test.cc b/tests/ut/cpp/dataset/map_op_test.cc deleted file mode 100644 index 21d82d84a28..00000000000 --- a/tests/ut/cpp/dataset/map_op_test.cc +++ /dev/null @@ -1,752 +0,0 @@ -/** - * Copyright 2019-2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#include "common/common.h" -#include "minddata/dataset/core/client.h" -#include "minddata/dataset/core/tensor.h" -#include "minddata/dataset/engine/datasetops/source/image_folder_op.h" -#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" -#include "minddata/dataset/engine/jagged_connector.h" -#include "minddata/dataset/kernels/image/decode_op.h" -#include "minddata/dataset/kernels/image/resize_op.h" -#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" -#include "minddata/dataset/kernels/tensor_op.h" -#include "utils/log_adapter.h" - -using namespace mindspore::dataset; -using mindspore::LogStream; -using mindspore::MsLogLevel::INFO; - -namespace mindspore { -namespace dataset { -namespace test { -class NoOp : public TensorOp { - public: - NoOp(){}; - - ~NoOp(){}; - - Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override { - *output = std::move(input); - return Status::OK(); - }; - - void Print(std::ostream &out) const override { out << "NoOp"; }; - - std::string Name() const override { return kNoOp; } -}; - -class ThreeToOneOp : public TensorOp { - public: - ThreeToOneOp(){}; - - ~ThreeToOneOp(){}; - - uint32_t NumInput() override { return 3; } - // Compute function that holds the actual implementation of the operation. - Status Compute(const TensorRow &input, TensorRow *output) override { - output->push_back(input[0]); - return Status::OK(); - }; - - void Print(std::ostream &out) const override { out << "ThreeToOneOp"; }; - - std::string Name() const override { return "ThreeToOneOp"; } -}; - -class OneToThreeOp : public TensorOp { - public: - OneToThreeOp(){}; - - ~OneToThreeOp(){}; - - uint32_t NumOutput() override { return 3; } - - // Compute function that holds the actual implementation of the operation. - // Simply pushing the same shared pointer of the first element of input vector three times. - Status Compute(const TensorRow &input, TensorRow *output) override { - output->push_back(input[0]); - output->push_back(input[0]); - output->push_back(input[0]); - return Status::OK(); - }; - - void Print(std::ostream &out) const override { out << "OneToThreeOp"; }; - - std::string Name() const override { return "OneToThreeOp"; }; -}; -} // namespace test -} // namespace dataset -} // namespace mindspore - -class MindDataTestMapOp : public UT::DatasetOpTesting { - public: - void SetUp() override { - DatasetOpTesting::SetUp(); - dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data"; - schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json"; - - GlobalInit(); - - // Start with an empty execution tree - my_tree_ = std::make_shared(); - } - - std::shared_ptr CreateTFReaderOp() { - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - - std::unique_ptr schema = std::make_unique(); - std::vector columns_to_load = {"image", "label", "A", "B"}; - (void)schema->LoadSchemaFile(schema_path_, columns_to_load); - std::vector files = {dataset_path_}; - std::shared_ptr my_tfreader_op = std::make_shared( - 1, 2, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false); - (void)my_tfreader_op->Init(); - return my_tfreader_op; - } - - std::shared_ptr my_tree_; - - private: - std::string dataset_path_; - std::string schema_path_; -}; - -std::shared_ptr ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path, - bool shuf = false, std::shared_ptr sampler = nullptr, - std::map map = {}, bool decode = false) { - std::unique_ptr schema = std::make_unique(); - TensorShape scalar = TensorShape::CreateScalar(); - (void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); - (void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)); - std::set ext = {".jpg", ".JPEG"}; - if (sampler == nullptr) { - int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data - int64_t start_index = 0; - sampler = std::make_shared(start_index, num_samples); - } - std::shared_ptr so = - std::make_shared(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler); - return so; -} - -// TestAsMap scenario: -// TFReaderOp reads a dataset that have column ordering |image|label|A|B|. -// A TensorOp that does nothing picks the "image" column and produces a column named "X". -// Thus, based on the new MapOp behaviour, the column ordering will be |X|label|A|B|. -// Verify that the "image" column is removed and "X" column is added. -TEST_F(MindDataTestMapOp, TestAsMap) { - Status rc; - MS_LOG(INFO) << "Doing TestAsMap."; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_no_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_no_op); - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector in_columns = {"image"}; - std::vector out_columns = {"X"}; - std::shared_ptr my_map_op = - std::make_shared(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size); - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_map_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - - // Assign the tree root - rc = my_tree_->AssignRoot(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - // Now prepare the tree - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(tensor_map.size(), 4); - EXPECT_EQ(tensor_map.find("image"), tensor_map.end()); - EXPECT_NE(tensor_map.find("label"), tensor_map.end()); - EXPECT_NE(tensor_map.find("X"), tensor_map.end()); - EXPECT_NE(tensor_map.find("A"), tensor_map.end()); - EXPECT_NE(tensor_map.find("B"), tensor_map.end()); -} - -// Test3to1 scenario: -// TFReaderOp reads a dataset that have column ordering |image|label|A|B|. -// A 3-to-1 TensorOp picks the columns [image, A, B] and produce a column named "X". -// Thus, based on the new MapOp behaviour, the column ordering will be |X|label|. -// Verify that the only columns "X" and "label" exist. -TEST_F(MindDataTestMapOp, Test3to1) { - Status rc; - MS_LOG(INFO) << "Doing Test3to1."; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_op); - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector in_columns = {"image", "A", "B"}; - std::vector out_columns = {"X"}; - - std::shared_ptr my_map_op = - std::make_shared(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size); - - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_map_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->AssignRoot(my_map_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - while (!tensor_map.empty()) { - EXPECT_EQ(tensor_map.size(), 2); - EXPECT_EQ(tensor_map.find("image"), tensor_map.end()); - EXPECT_NE(tensor_map.find("label"), tensor_map.end()); - EXPECT_NE(tensor_map.find("X"), tensor_map.end()); - EXPECT_EQ(tensor_map.find("A"), tensor_map.end()); - EXPECT_EQ(tensor_map.find("B"), tensor_map.end()); - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - } -} - -// Test1to3 scenario: -// TFReaderOp reads a dataset that have column ordering |image|label|A|B|. -// A 1-to-3 TensorOp picks the columns [image] and produce a column named [X, Y, Z]. -// Thus, based on the new MapOp behaviour, the column ordering will be |X|Y|Z|label|A|B|. -// Verify that the only columns X, Y, Z are added (to the front) and followed by columns label, A, B.. -TEST_F(MindDataTestMapOp, Test1to3) { - Status rc; - MS_LOG(INFO) << "Doing Test1to3."; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_op); - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector in_columns = {"image"}; - std::vector out_columns = {"X", "Y", "Z"}; - - std::shared_ptr my_map_op = - std::make_shared(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size); - // ProjectOp - std::vector columns_to_project = {"X", "Y", "Z", "label", "A", "B"}; - std::shared_ptr my_project_op = std::make_shared(columns_to_project); - rc = my_tree_->AssociateNode(my_project_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree_->AssignRoot(my_project_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_project_op->AddChild(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_map_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(tensor_map.size(), 6); - EXPECT_EQ(tensor_map.find("image"), tensor_map.end()); - EXPECT_NE(tensor_map.find("label"), tensor_map.end()); - EXPECT_NE(tensor_map.find("A"), tensor_map.end()); - EXPECT_NE(tensor_map.find("B"), tensor_map.end()); - EXPECT_NE(tensor_map.find("X"), tensor_map.end()); - EXPECT_NE(tensor_map.find("Y"), tensor_map.end()); - EXPECT_NE(tensor_map.find("Z"), tensor_map.end()); - - // Getting the next row as vector (by position). - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - - // Based on the schema file, create the golden result to compare with. - std::vector golden_types({DataType::Type::DE_UINT8, DataType::Type::DE_UINT8, - DataType::Type::DE_UINT8, DataType::Type::DE_INT64, - DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64}); - - std::vector golden_ranks({3, 3, 3, 1, 4, 1}); - - std::vector golden_shapes({TensorShape({3, 4, 2}), TensorShape({3, 4, 2}), TensorShape({3, 4, 2}), - TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})}); - - while (!tensor_list.empty()) { - for (uint32_t i = 0; i < tensor_list.size(); i++) { - EXPECT_EQ(tensor_list[i]->type(), golden_types[i]); - EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]); - EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]); - EXPECT_NE(tensor_list[i]->GetBuffer(), nullptr); - } - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - } -} - -// TestMultiTensorOp scenario: -// TFReaderOp reads a dataset that have column ordering |image|label|A|B|. -// A series of 3-to-1 and 1-to-3 TensorOps are applied to [image, A, B] and -// produce final output columns [X, Y, Z]. -// Based on the new MapOp behaviour, the column ordering will be |X|Y|Z|label|. -TEST_F(MindDataTestMapOp, TestMultiTensorOp) { - Status rc; - MS_LOG(INFO) << "Doing TestMultiTensorOp."; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_op1 = std::make_shared(); - auto my_op2 = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_op1); - my_func_list.push_back(my_op2); - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector in_columns = {"image", "A", "B"}; - std::vector out_columns = {"X", "Y", "Z"}; - - std::shared_ptr my_map_op = - std::make_shared(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size); - - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_map_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->AssignRoot(my_map_op); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - while (!tensor_map.empty()) { - EXPECT_EQ(tensor_map.size(), 4); - EXPECT_EQ(tensor_map.find("image"), tensor_map.end()); - EXPECT_EQ(tensor_map.find("A"), tensor_map.end()); - EXPECT_EQ(tensor_map.find("B"), tensor_map.end()); - EXPECT_NE(tensor_map.find("label"), tensor_map.end()); - EXPECT_NE(tensor_map.find("X"), tensor_map.end()); - EXPECT_NE(tensor_map.find("Y"), tensor_map.end()); - EXPECT_NE(tensor_map.find("Z"), tensor_map.end()); - - // XYZ are Tensor shared_ptr to image, so it should have the same shape as image column. - EXPECT_EQ(tensor_map["X"]->shape(), TensorShape({3, 4, 2})); - EXPECT_EQ(tensor_map["Y"]->shape(), TensorShape({3, 4, 2})); - EXPECT_EQ(tensor_map["Z"]->shape(), TensorShape({3, 4, 2})); - rc = di.GetNextAsMap(&tensor_map); - EXPECT_TRUE(rc.IsOk()); - } -} - -TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) { - Status rc; - MS_LOG(INFO) << "Doing TestTFReaderRepeatMap."; - uint32_t num_repeats = 3; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total - // of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_no_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_no_op); - - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree_->AssociateNode(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector in_columns = {"label"}; - std::vector out_columns = {}; - - std::shared_ptr my_map_op = - std::make_shared(in_columns, out_columns, std::move(my_func_list), 5, op_connector_size); - - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_map_op->AddChild(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - my_tfreader_op->SetTotalRepeats(num_repeats); - my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->AssignRoot(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(tensor_list.size(), 4); - uint32_t row_count = 0; - while (!tensor_list.empty()) { - row_count++; - MS_LOG(INFO) << "row_count: " << row_count << "."; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - } - ASSERT_EQ(row_count, 10 * num_repeats); -} - -TEST_F(MindDataTestMapOp, TestTFReaderMapRepeat) { - Status rc; - MS_LOG(INFO) << "Doing TestTFReaderMapRepeat."; - uint32_t num_repeats = 3; - - // Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total - // of 10 rows. - auto my_tfreader_op = this->CreateTFReaderOp(); - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto my_no_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(my_no_op); - - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree_->AssociateNode(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector input_columns = {"label"}; - std::vector output_columns = {}; - std::shared_ptr my_map_op = - std::make_shared(input_columns, output_columns, std::move(my_func_list), 50, op_connector_size); - - rc = my_tree_->AssociateNode(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - my_map_op->SetTotalRepeats(num_repeats); - my_map_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_map_op); - EXPECT_TRUE(rc.IsOk()); - - my_tfreader_op->SetTotalRepeats(num_repeats); - my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_map_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->AssignRoot(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(tensor_list.size(), 4); - uint32_t row_count = 0; - while (!tensor_list.empty()) { - row_count++; - MS_LOG(INFO) << "row_count: " << row_count << "."; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - } - ASSERT_EQ(row_count, 10 * num_repeats); -} - -TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) { - Status rc; - MS_LOG(INFO) << "Doing TFReader_Decode_Repeat_Resize."; - uint32_t num_repeats = 2; - - std::string dataset_path = datasets_root_path_ + "/" + "test_tf_file_3_images/train-0000-of-0001.data"; - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::unique_ptr schema = std::make_unique(); - std::vector columns_to_load = {"image", "label"}; - std::vector files = {dataset_path}; - std::shared_ptr my_tfreader_op = std::make_shared( - 1, 2, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false); - (void)my_tfreader_op->Init(); - - rc = my_tree_->AssociateNode(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - auto decode_op = std::make_shared(); - std::vector> my_func_list; - my_func_list.push_back(decode_op); - - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree_->AssociateNode(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - std::vector input_columns = {"image"}; - std::vector output_columns = {}; - std::shared_ptr my_map_decode_op = - std::make_shared(input_columns, output_columns, std::move(my_func_list), 4, op_connector_size); - rc = my_tree_->AssociateNode(my_map_decode_op); - EXPECT_TRUE(rc.IsOk()); - - auto resize_op = std::make_shared(300, 300); - std::vector> my_func_list2; - my_func_list2.push_back(resize_op); - std::shared_ptr my_map_resize_op = - std::make_shared(input_columns, output_columns, std::move(my_func_list2), 5, op_connector_size); - rc = my_tree_->AssociateNode(my_map_resize_op); - EXPECT_TRUE(rc.IsOk()); - - my_tfreader_op->SetTotalRepeats(num_repeats); - my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_map_decode_op->AddChild(my_tfreader_op); - EXPECT_TRUE(rc.IsOk()); - - my_map_decode_op->SetTotalRepeats(num_repeats); - my_map_decode_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_map_decode_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_map_resize_op->AddChild(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->AssignRoot(my_map_resize_op); - EXPECT_TRUE(rc.IsOk()); - - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - EXPECT_EQ(tensor_list.size(), 2); - uint32_t row_count = 0; - while (!tensor_list.empty()) { - row_count++; - rc = di.FetchNextTensorRow(&tensor_list); - EXPECT_TRUE(rc.IsOk()); - } - - ASSERT_EQ(row_count, 6); -} - -TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) { - Status rc; - MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize."; - - std::string folder_path = datasets_root_path_ + "/testPK/data"; - - uint32_t num_repeats = 2; - std::shared_ptr repeat_op = std::make_shared(num_repeats); - EXPECT_TRUE(rc.IsOk()); - - auto decode_op = std::make_shared(); - std::vector> func_list; - func_list.push_back(decode_op); - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_parallel_workers = config_manager->num_parallel_workers(); - std::vector input_columns = {"image"}; - std::vector output_columns = {}; - std::shared_ptr map_decode_map = - std::make_shared(input_columns, output_columns, func_list, 4, op_connector_size); - - auto resize_op = std::make_shared(300, 300); - std::vector> func_list2; - func_list2.push_back(resize_op); - std::shared_ptr map_resize_op = - std::make_shared(input_columns, output_columns, func_list2, 5, op_connector_size); - - auto image_folder_op = ImageFolder(num_parallel_workers, 2, 32, folder_path, false); - image_folder_op->SetTotalRepeats(num_repeats); - image_folder_op->SetNumRepeatsPerEpoch(num_repeats); - map_decode_map->SetTotalRepeats(num_repeats); - map_decode_map->SetNumRepeatsPerEpoch(num_repeats); - my_tree_ = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op}); - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - int32_t img_class[] = {0, 1, 2, 3}; - std::string result; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; - EXPECT_TRUE(img_class[(i % 44) / 11] == label); - // Dump all the image into string, to be used as a comparison later. - result.append((char *)tensor_map["image"]->GetBuffer(), (int64_t)tensor_map["image"]->Size()); - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - i++; - } - EXPECT_TRUE(i == 88); - - // Part-2 : creating mapop with performance mode = false, to check if the result is the same - // as when performance mode = true. - repeat_op = std::make_shared(num_repeats); - EXPECT_TRUE(rc.IsOk()); - map_decode_map = std::make_shared(input_columns, output_columns, func_list, 14, op_connector_size); - - map_resize_op = std::make_shared(input_columns, output_columns, func_list2, 15, op_connector_size); - - image_folder_op = ImageFolder(16, 2, 32, folder_path, false); - image_folder_op->SetTotalRepeats(num_repeats); - image_folder_op->SetNumRepeatsPerEpoch(num_repeats); - map_decode_map->SetTotalRepeats(num_repeats); - map_decode_map->SetNumRepeatsPerEpoch(num_repeats); - auto my_tree_2 = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op}); - - rc = my_tree_2->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_2->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di2(my_tree_2); - ASSERT_OK(di2.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - i = 0; - label = 0; - std::string result2; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; - EXPECT_TRUE(img_class[(i % 44) / 11] == label); - result2.append((char *)tensor_map["image"]->GetBuffer(), (int64_t)tensor_map["image"]->Size()); - ASSERT_OK(di2.GetNextAsMap(&tensor_map)); - i++; - } - EXPECT_TRUE(i == 88); - - EXPECT_EQ(result.size(), result2.size()); - EXPECT_EQ(result, result2); -} - -TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) { - Status rc; - MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize_NoInputColumns."; - - std::string folder_path = datasets_root_path_ + "/testPK/data"; - - uint32_t num_repeats = 2; - std::shared_ptr repeat_op = std::make_shared(num_repeats); - ; - - auto decode_op = std::make_shared(); - std::vector> func_list; - func_list.push_back(decode_op); - std::shared_ptr config_manager = GlobalContext::config_manager(); - auto op_connector_size = config_manager->op_connector_size(); - std::vector input_columns = {}; - std::vector output_columns = {}; - std::shared_ptr map_decode_map = - std::make_shared(input_columns, output_columns, std::move(func_list), 4, op_connector_size); - ; - - auto resize_op = std::make_shared(300, 300); - std::vector> func_list2; - func_list2.push_back(resize_op); - std::shared_ptr map_resize_op = - std::make_shared(input_columns, output_columns, std::move(func_list2), 5, op_connector_size); - ; - - auto image_folder_op = ImageFolder(16, 2, 32, folder_path, false); - image_folder_op->SetTotalRepeats(num_repeats); - image_folder_op->SetNumRepeatsPerEpoch(num_repeats); - map_decode_map->SetTotalRepeats(num_repeats); - map_decode_map->SetNumRepeatsPerEpoch(num_repeats); - my_tree_ = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op}); - rc = my_tree_->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = my_tree_->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree_); - TensorMap tensor_map; - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - EXPECT_TRUE(rc.IsOk()); - uint64_t i = 0; - int32_t label = 0; - int32_t img_class[] = {0, 1, 2, 3}; - std::string result; - while (tensor_map.size() != 0) { - tensor_map["label"]->GetItemAt(&label, {}); - EXPECT_TRUE(img_class[(i % 44) / 11] == label); - ASSERT_OK(di.GetNextAsMap(&tensor_map)); - i++; - } - EXPECT_TRUE(i == 88); -} diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc index b9093fda65f..2757a46c19a 100644 --- a/tests/ut/cpp/dataset/mind_record_op_test.cc +++ b/tests/ut/cpp/dataset/mind_record_op_test.cc @@ -62,131 +62,6 @@ std::shared_ptr CreateMindRecord(int32_t mind_record_workers, bool return std::move(op); } -TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) { - // single MindRecord op and nothing else - // - // MindRecordOp - - MS_LOG(INFO) << "UT test TestMindRecordBasic"; - - Status rc; - - // Start with an empty execution tree - auto my_tree = std::make_shared(); - - // Test info: - // Dataset from testDataset1 has 10 rows, 2 columns. - // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row - // only. 2 workers. - // Test a column selection instead of all columns as well. - - std::vector column_list; - std::string label_col_name("file_name"); - column_list.push_back(label_col_name); - label_col_name = "label"; - column_list.push_back(label_col_name); - - std::shared_ptr my_mindrecord_op = CreateMindRecord( - 4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {}); - - MS_LOG(DEBUG) << (*my_mindrecord_op); - - my_tree->AssociateNode(my_mindrecord_op); - - // Set children/root layout. - my_tree->AssignRoot(my_mindrecord_op); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - my_tree->Prepare(); - my_tree->Launch(); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl; - MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()); - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } -} - -TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) { - // single MindRecord op and nothing else - // - // MindRecordOp - - MS_LOG(INFO) << "UT test TestMindRecordSample"; - - Status rc; - - // Start with an empty execution tree - auto my_tree = std::make_shared(); - - // Test info: - // Dataset from testDataset1 has 10 rows, 2 columns. - // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row - // only. 2 workers. - // Test a column selection instead of all columns as well. - - std::vector column_list; - std::string label_col_name("file_name"); - column_list.push_back(label_col_name); - label_col_name = "label"; - column_list.push_back(label_col_name); - - std::vector> operators; - operators.push_back(std::make_shared(4)); - - std::shared_ptr my_mindrecord_op = - CreateMindRecord(4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, - column_list, operators); - MS_LOG(DEBUG) << (*my_mindrecord_op); - - my_tree->AssociateNode(my_mindrecord_op); - - // Set children/root layout. - my_tree->AssignRoot(my_mindrecord_op); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - my_tree->Prepare(); - my_tree->Launch(); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl; - MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()); - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } -} - TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) { // single MindRecord op and nothing else // @@ -319,148 +194,3 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) { row_count++; } } - -TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { - // single MindRecord op and nothing else - // - // MindRecordOp - - MS_LOG(INFO) << "UT test TestMindRecordRepeat"; - - Status rc; - - // Start with an empty execution tree - auto my_tree = std::make_shared(); - - // Test info: - // Dataset from testDataset1 has 10 rows, 2 columns. - // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row - // only. 2 workers. - // Test a column selection instead of all columns as well. - - std::vector column_list; - std::string label_col_name("file_name"); - column_list.push_back(label_col_name); - label_col_name = "label"; - column_list.push_back(label_col_name); - std::shared_ptr my_mindrecord_op = CreateMindRecord( - 4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {}); - - MS_LOG(DEBUG) << (*my_mindrecord_op); - - rc = my_tree->AssociateNode(my_mindrecord_op); - EXPECT_TRUE(rc.IsOk()); - - uint32_t num_repeats = 2; - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree->AssociateNode(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - my_mindrecord_op->SetTotalRepeats(num_repeats); - my_mindrecord_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_mindrecord_op); - EXPECT_TRUE(rc.IsOk()); - - // Set children/root layout. - rc = my_tree->AssignRoot(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - my_tree->Prepare(); - my_tree->Launch(); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl; - MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()); - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } -} - -TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { - // single MindRecord op and nothing else - // - // MindRecordOp - - MS_LOG(INFO) << "UT test TestMindRecordBlockReaderRepeat"; - - Status rc; - - // Start with an empty execution tree - auto my_tree = std::make_shared(); - - // Test info: - // Dataset from testDataset1 has 10 rows, 2 columns. - // RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row - // only. 2 workers. - // Test a column selection instead of all columns as well. - - std::vector column_list; - std::string label_col_name("file_name"); - column_list.push_back(label_col_name); - label_col_name = "label"; - column_list.push_back(label_col_name); - - std::shared_ptr my_mindrecord_op = CreateMindRecord( - 4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {}); - - MS_LOG(DEBUG) << (*my_mindrecord_op); - - rc = my_tree->AssociateNode(my_mindrecord_op); - EXPECT_TRUE(rc.IsOk()); - - uint32_t num_repeats = 2; - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree->AssociateNode(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - my_mindrecord_op->SetTotalRepeats(num_repeats); - my_mindrecord_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_mindrecord_op); - EXPECT_TRUE(rc.IsOk()); - - // Set children/root layout. - rc = my_tree->AssignRoot(my_repeat_op); - EXPECT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - my_tree->Prepare(); - my_tree->Launch(); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl; - MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()); - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } -} diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc deleted file mode 100644 index 258cd2ca46f..00000000000 --- a/tests/ut/cpp/dataset/random_data_op_test.cc +++ /dev/null @@ -1,417 +0,0 @@ -/** - * Copyright 2019-2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "minddata/dataset/core/client.h" -#include "common/common.h" -#include "gtest/gtest.h" -#include -#include -#include -#include "minddata/dataset/core/tensor_shape.h" -#include "minddata/dataset/engine/datasetops/source/random_data_op.h" -#include "minddata/dataset/engine/data_schema.h" -#include "minddata/dataset/util/random.h" - -using namespace mindspore::dataset; -using mindspore::LogStream; -using mindspore::ExceptionType::NoExceptionType; -using mindspore::MsLogLevel::INFO; - -class MindDataTestRandomDataOp : public UT::DatasetOpTesting {}; - -// Test info: -// - Simple test with a user-provided schema generated purely from DataSchema C API -// - has an interaction loop -// -// Tree: single node tree with RandomDataOp -// -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) { - Status rc; - int32_t rank = 0; // not used - MS_LOG(INFO) << "UT test RandomDataOpBasic1"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - // Create a schema using the C api's - std::unique_ptr testSchema = std::make_unique(); - - // RandomDataOp can randomly fill in unknown dimension lengths of a shape. - // Most other ops cannot do that as they are limited by the physical data itself. We're - // more flexible with random data since it is just making stuff up on the fly. - TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3}); - ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible, - rank, // not used - &c1Shape); - - // Column 2 will just be a scalar label number - TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor - ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape); - - testSchema->AddColumn(c1); - testSchema->AddColumn(c2); - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = - std::make_shared(1, op_connector_size, 25, std::move(testSchema)); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - std::ostringstream ss; - ss << *myRandomDataOp; - MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str(); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = myTree->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - // Don't display these rows...too big to show - MS_LOG(INFO) << "Row fetched #: " << rowCount; - - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 25); -} - -// Test info: -// - Simple test with a randomly generated schema -// - no iteration loop on this one, just create the op -// -// Tree: single node tree with RandomDataOp -// -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) { - Status rc; - MS_LOG(INFO) << "UT test RandomDataOpBasic2"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = std::make_shared(1, op_connector_size, 0, nullptr); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - std::ostringstream ss; - ss << *myRandomDataOp; - MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); -} - -// Test info: -// - json file test with iteration -// -// Tree: single node tree with RandomDataOp -// -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) { - Status rc; - MS_LOG(INFO) << "UT test RandomDataOpBasic3"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - std::unique_ptr testSchema = std::make_unique(); - rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {}); - EXPECT_TRUE(rc.IsOk()); - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = - std::make_shared(1, op_connector_size, 10, std::move(testSchema)); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - std::ostringstream ss; - ss << *myRandomDataOp; - MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = myTree->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - // Don't display these rows...too big to show - MS_LOG(INFO) << "Row fetched #: " << rowCount; - - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 10); -} - -// Test info: -// - json schema input it's a fairly simple one -// - has an interaction loop -// -// Tree: RepeatOp over RandomDataOp -// -// RepeatOp -// | -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) { - Status rc; - MS_LOG(INFO) << "UT test RandomDataOpBasic4"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - std::unique_ptr testSchema = std::make_unique(); - rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); - EXPECT_TRUE(rc.IsOk()); - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = - std::make_shared(1, op_connector_size, 10, std::move(testSchema)); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - uint32_t numRepeats = 2; - std::shared_ptr myRepeatOp = std::make_shared(numRepeats); - rc = myTree->AssociateNode(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - myRandomDataOp->SetTotalRepeats(numRepeats); - myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); - rc = myRepeatOp->AddChild(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = myTree->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - MS_LOG(INFO) << "Row display for row #: " << rowCount; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensorList.size(); i++) { - std::ostringstream ss; - ss << *tensorList[i] << std::endl; - MS_LOG(INFO) << "Tensor print: %s" << ss.str(); - } - - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 20); -} - -// Test info: -// - json schema input it's a fairly simple one -// - has an interaction loop -// - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers -// -// Tree: RepeatOp over RandomDataOp -// -// RepeatOp -// | -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) { - Status rc; - MS_LOG(INFO) << "UT test RandomDataOpBasic5"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - std::unique_ptr testSchema = std::make_unique(); - rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); - EXPECT_TRUE(rc.IsOk()); - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = - std::make_shared(4, op_connector_size, 10, std::move(testSchema)); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - uint32_t numRepeats = 3; - std::shared_ptr myRepeatOp = std::make_shared(numRepeats); - rc = myTree->AssociateNode(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - myRandomDataOp->SetTotalRepeats(numRepeats); - myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); - rc = myRepeatOp->AddChild(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = myTree->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - MS_LOG(INFO) << "Row display for row #: " << rowCount; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensorList.size(); i++) { - std::ostringstream ss; - ss << *tensorList[i] << std::endl; - MS_LOG(INFO) << "Tensor print: ", ss.str(); - } - - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 30); -} - -// Test info: -// - repeat shuffle random -// -// Tree: RepeatOp over RandomDataOp -// -// RepeatOp -// | -// ShuffleOp -// | -// RandomDataOp -// -TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { - Status rc; - MS_LOG(INFO) << "UT test RandomDataOpTree1"; - - // Start with an empty execution tree - auto myTree = std::make_shared(); - - std::unique_ptr testSchema = std::make_unique(); - rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); - EXPECT_TRUE(rc.IsOk()); - std::shared_ptr cfg = GlobalContext::config_manager(); - auto op_connector_size = cfg->op_connector_size(); - - std::shared_ptr myRandomDataOp = - std::make_shared(4, op_connector_size, 10, std::move(testSchema)); - - rc = myTree->AssociateNode(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - uint32_t shuffle_seed = GetSeed(); - std::shared_ptr myShuffleOp = std::make_shared(4, shuffle_seed, op_connector_size, true); - - rc = myTree->AssociateNode(myShuffleOp); - EXPECT_TRUE(rc.IsOk()); - - uint32_t numRepeats = 3; - std::shared_ptr myRepeatOp = std::make_shared(numRepeats); - rc = myTree->AssociateNode(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - myShuffleOp->SetTotalRepeats(numRepeats); - myShuffleOp->SetNumRepeatsPerEpoch(numRepeats); - rc = myRepeatOp->AddChild(myShuffleOp); - EXPECT_TRUE(rc.IsOk()); - - myRandomDataOp->SetTotalRepeats(numRepeats); - myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); - rc = myShuffleOp->AddChild(myRandomDataOp); - EXPECT_TRUE(rc.IsOk()); - - rc = myTree->AssignRoot(myRepeatOp); - EXPECT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration"; - rc = myTree->Prepare(); - EXPECT_TRUE(rc.IsOk()); - rc = myTree->Launch(); - EXPECT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator dI(myTree); - TensorRow tensorList; - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - int rowCount = 0; - while (!tensorList.empty()) { - MS_LOG(INFO) << "Row display for row #: " << rowCount; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensorList.size(); i++) { - std::ostringstream ss; - ss << *tensorList[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str(); - } - - rc = dI.FetchNextTensorRow(&tensorList); - EXPECT_TRUE(rc.IsOk()); - rowCount++; - } - ASSERT_EQ(rowCount, 30); -} diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc index 9acb6940c11..9bc87d68e00 100644 --- a/tests/ut/cpp/dataset/tfReader_op_test.cc +++ b/tests/ut/cpp/dataset/tfReader_op_test.cc @@ -33,63 +33,6 @@ using mindspore::MsLogLevel::INFO; class MindDataTestTFReaderOp : public UT::DatasetOpTesting {}; -TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; - - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_workers = 1; - int32_t worker_connector_size = config_manager->worker_connector_size(); - std::vector files = {dataset_path}; - std::vector columns_to_load = {}; - - std::unique_ptr schema = std::make_unique(); - schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->AssignRoot(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 12); -} - TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) { // Start with an empty execution tree auto my_tree = std::make_shared(); @@ -318,201 +261,6 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) { ASSERT_EQ(row_count, 12); } -TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; - - // TFReaderOp - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_workers = 1; - int32_t worker_connector_size = 16; - std::unique_ptr schema = std::make_unique(); - std::vector files = {dataset_path}; - std::vector columns_to_load = {}; - - schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - // RepeatOp - uint32_t num_repeats = 3; - std::shared_ptr my_repeat_op = std::make_shared(num_repeats); - rc = my_tree->AssociateNode(my_repeat_op); - ASSERT_TRUE(rc.IsOk()); - - // Set children/root layout. - my_tfreader_op->SetTotalRepeats(num_repeats); - my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats); - rc = my_repeat_op->AddChild(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssignRoot(my_repeat_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count << "."; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 12 * 3); -} - -TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes"; - std::vector files = {dataset_path + "/test.data"}; - - std::unique_ptr schema = std::make_unique(); - std::vector columns_to_load; - columns_to_load.push_back("col_sint32"); - columns_to_load.push_back("col_binary"); - schema->LoadSchemaFile(dataset_path + "/datasetSchema.json", columns_to_load); - - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t worker_connector_size = config_manager->worker_connector_size(); - int32_t num_workers = 1; - - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->AssignRoot(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - // Display the tensor by calling the printer on it - ASSERT_EQ(tensor_list.size(), columns_to_load.size()); - - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 12); -} - -TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes"; - - std::string data_schema_filepath = dataset_path + "/datasetSchema1Row.json"; - - // TFReaderOp - std::unique_ptr schema = std::make_unique(); - schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_workers = 1; - int32_t worker_connector_size = config_manager->worker_connector_size(); - std::vector files = {dataset_path + "/test.data"}; - std::vector columns_to_load = {}; - - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->AssignRoot(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count << "."; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 1); -} - TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) { // Start with an empty execution tree auto my_tree = std::make_shared(); @@ -575,122 +323,6 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) { ASSERT_EQ(row_count, 5); } -TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes"; - - std::string data_schema_filepath = dataset_path + "/datasetSchema7Rows.json"; - - // TFReaderOp - std::unique_ptr schema = std::make_unique(); - schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {}); - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_workers = 1; - int32_t worker_connector_size = config_manager->worker_connector_size(); - std::vector files = {dataset_path + "/test.data"}; - std::vector columns_to_load = {}; - - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->AssignRoot(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - MS_LOG(INFO) << "Row display for row #: " << row_count << "."; - - // Display the tensor by calling the printer on it - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 7); -} - -TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) { - // Start with an empty execution tree - auto my_tree = std::make_shared(); - Status rc; - std::string dataset_path; - dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data"; - std::shared_ptr config_manager = GlobalContext::config_manager(); - int32_t op_connector_size = config_manager->op_connector_size(); - int32_t num_workers = 1; - std::vector columns_to_load = {}; - std::vector files = {dataset_path}; - int32_t worker_connector_size = config_manager->worker_connector_size(); - std::unique_ptr schema = std::make_unique(); - std::shared_ptr my_tfreader_op = - std::make_shared(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size, - columns_to_load, false, 1, 0, false); - rc = my_tfreader_op->Init(); - ASSERT_TRUE(rc.IsOk()); - rc = my_tree->AssociateNode(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->AssignRoot(my_tfreader_op); - ASSERT_TRUE(rc.IsOk()); - - MS_LOG(INFO) << "Launching tree and begin iteration."; - rc = my_tree->Prepare(); - ASSERT_TRUE(rc.IsOk()); - - rc = my_tree->Launch(); - ASSERT_TRUE(rc.IsOk()); - - // Start the loop of reading tensors from our pipeline - DatasetIterator di(my_tree); - TensorRow tensor_list; - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - - int row_count = 0; - while (!tensor_list.empty()) { - // Display the tensor by calling the printer on it - ASSERT_EQ(tensor_list.size(), 9); - for (int i = 0; i < tensor_list.size(); i++) { - std::ostringstream ss; - ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; - MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; - } - - rc = di.FetchNextTensorRow(&tensor_list); - ASSERT_TRUE(rc.IsOk()); - row_count++; - } - - ASSERT_EQ(row_count, 12); -} TEST_F(MindDataTestTFReaderOp, TestTotalRowsBasic) { std::string tf_file = datasets_root_path_ + "/testTFTestAllTypes/test.data";