MD C++ UT: Remove old tests part 3

Merge pull request  from hetshah/hs_remove_old_tests
This commit is contained in:
i-robot 2021-11-25 19:29:30 +00:00 committed by Gitee
commit 3520f1427d
12 changed files with 799 additions and 2489 deletions

View File

@ -46,260 +46,18 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting {
protected:
};
TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
const std::shared_ptr<de::BatchOp> &op = Batch(12);
EXPECT_EQ(op->Name(), "BatchOp");
auto tree = Build({TFReader(schema_file), op});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
} else {
int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<de::Tensor> t;
rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload, &t);
EXPECT_TRUE(rc.IsOk());
// verify the actual data in Tensor is correct
EXPECT_EQ(*t == *tensor_map["col_sint64"], true);
// change what's in Tensor and verify this time the data is incorrect1;
EXPECT_EQ(*t == *tensor_map["col_sint16"], false);
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
if (tensor_map.size() == 0) {
success = true;
}
}
EXPECT_EQ(success, true);
}
TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Repeat(2);
auto op3 = Batch(7, true);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
} else {
int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2, t3;
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 2), &t3);
EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
if (tensor_map.size() == 0) {
success = true;
}
}
EXPECT_EQ(success, true);
}
TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Repeat(2);
auto op3 = Batch(7, false);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
} else {
int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2, t3, t4;
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 2), &t3);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 9), &t4);
EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t4 == *(tensor_map["col_sint64"]), true); // last call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
if (tensor_map.size() == 0) {
success = true;
}
}
EXPECT_EQ(success, true);
}
TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Batch(7, false);
auto op3 = Repeat(2);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
op2->SetTotalRepeats(2);
op2->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
} else {
int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2;
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
if (tensor_map.size() == 0) {
success = true;
}
}
EXPECT_EQ(success, true);
}
TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Batch(5, true);
auto op3 = Repeat(2);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
op2->SetTotalRepeats(2);
op2->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
} else {
int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2;
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 5), &t2);
EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext()
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
if (tensor_map.size() == 0) {
success = true;
}
}
EXPECT_EQ(success, true);
}
TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
// This test has been disabled because PadInfo is not currently supported in the C++ API.
// Feature: Test Batch op with padding on TFReader
// Description: Create Batch operation with padding on a TFReader dataset
// Expectation: The data within the created object should match the expected data
TEST_F(MindDataTestBatchOp, DISABLED_TestSimpleBatchPadding) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
PadInfo m;
std::shared_ptr<Tensor> pad_value;
Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value);
pad_value->SetItemAt<float>({}, -1);
m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)});
/*
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
@ -309,6 +67,7 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
pybind11::function batch_size_func;
pybind11::function batch_map_func;
*/
int32_t batch_size = 12;
bool drop = false;
std::shared_ptr<BatchOp> op = Batch(batch_size, drop, m);

View File

@ -944,3 +944,94 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
EXPECT_EQ(iter2, nullptr);
}
// Feature: Test RandomData with Cache and Repeat
// Description: Iterate through dataset and count rows
// Expectation: There should be 200 rows in the dataset
TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi1) {
session_id_type env_session;
Status s = GetSessionFromEnv(&env_session);
EXPECT_EQ(s, Status::OK());
std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, true);
EXPECT_NE(some_cache, nullptr);
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {640, 480, 3}));
ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {}));
std::shared_ptr<Dataset> ds = RandomData(50, schema, {}, some_cache);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 4;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 200);
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test RandomData with Cache and Repeat
// Description: Set mem_sz such that spill occurs, iterate through dataset and count rows
// Expectation: There should be 40 rows in the dataset
TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataSpillCApi) {
session_id_type env_session;
Status s = GetSessionFromEnv(&env_session);
EXPECT_EQ(s, Status::OK());
// Create cache with mem_sz=4 and spill=true
std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 4, true);
EXPECT_NE(some_cache, nullptr);
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {640, 480, 3}));
ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {}));
std::shared_ptr<Dataset> ds = RandomData(10, schema, {}, some_cache);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 4;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 40);
// Manually terminate the pipeline
iter->Stop();
}

View File

@ -419,3 +419,38 @@ TEST_F(MindDataTestPipeline, TestManifestWithNullSamplerError) {
// Expect failure: invalid Manifest input, sampler cannot be nullptr
EXPECT_EQ(iter, nullptr);
}
// Feature: Test SubsetRandomSampler with Manifest
// Description: Use SubsetRandomSampler with 1 index given, iterate through dataset and count rows
// Expectation: There should be 1 row in the dataset
TEST_F(MindDataTestPipeline, TestManifestSubsetRandomSampler) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestSubsetRandomSampler.";
std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
std::vector<int64_t> indices = {1};
// Create a Manifest Dataset
std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<SubsetRandomSampler>(indices));
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline
iter->Stop();
}

View File

@ -19,10 +19,125 @@
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
#include "minddata/dataset/include/dataset/datasets.h"
#include "minddata/dataset/include/dataset/vision.h"
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
using namespace mindspore::dataset;
using mindspore::dataset::Tensor;
namespace mindspore {
namespace dataset {
namespace test {
class NoOp : public TensorOp {
public:
NoOp(){};
~NoOp(){};
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
*output = std::move(input);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "NoOp"; };
std::string Name() const override { return kNoOp; }
};
class ThreeToOneOp : public TensorOp {
public:
ThreeToOneOp(){};
~ThreeToOneOp(){};
uint32_t NumInput() override {
uint32_t numInput = 3;
return numInput;
}
// Compute function that holds the actual implementation of the operation.
Status Compute(const TensorRow &input, TensorRow *output) override {
output->push_back(input[0]);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
std::string Name() const override { return "ThreeToOneOp"; }
};
class OneToThreeOp : public TensorOp {
public:
OneToThreeOp(){};
~OneToThreeOp(){};
uint32_t NumOutput() override {
uint32_t numOutput = 3;
return numOutput;
}
// Compute function that holds the actual implementation of the operation.
// Simply pushing the same shared pointer of the first element of input vector three times.
Status Compute(const TensorRow &input, TensorRow *output) override {
output->push_back(input[0]);
output->push_back(input[0]);
output->push_back(input[0]);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
std::string Name() const override { return "OneToThreeOp"; };
};
class NoTransform final : public TensorTransform {
public:
explicit NoTransform() {}
~NoTransform() = default;
protected:
std::shared_ptr<TensorOperation> Parse() override {
return std::make_shared<transforms::PreBuiltOperation>(std::make_shared<mindspore::dataset::test::NoOp>());
}
private:
struct Data;
std::shared_ptr<Data> data_;
};
class ThreeToOneTransform final : public TensorTransform {
public:
explicit ThreeToOneTransform() {}
~ThreeToOneTransform() = default;
protected:
std::shared_ptr<TensorOperation> Parse() override {
return std::make_shared<transforms::PreBuiltOperation>(std::make_shared<mindspore::dataset::test::ThreeToOneOp>());
}
private:
struct Data;
std::shared_ptr<Data> data_;
};
class OneToThreeTransform final : public TensorTransform {
public:
explicit OneToThreeTransform() {}
~OneToThreeTransform() = default;
protected:
std::shared_ptr<TensorOperation> Parse() override {
return std::make_shared<transforms::PreBuiltOperation>(std::make_shared<mindspore::dataset::test::OneToThreeOp>());
}
private:
struct Data;
std::shared_ptr<Data> data_;
};
} // namespace test
} // namespace dataset
} // namespace mindspore
class MindDataTestPipeline : public UT::DatasetOpTesting {
protected:
};
@ -2217,4 +2332,441 @@ TEST_F(MindDataTestPipeline, TestTFRecordZip) {
// Manually terminate the pipeline
iter->Stop();
}
}
// Feature: Test Repeat and Map with decode and resize ops on TFRecord
// Description: Iterate through dataset and count the number of rows and check the shape of the image data
// Expectation: There should be 6 rows in the dataset and shape is {30,30}
TEST_F(MindDataTestPipeline, TestTFRecordDecodeRepeatResize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.TestTFRecordDecodeRepeatResize";
// Create an ImageFolder Dataset
std::string file_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<Dataset> ds = TFRecord({file_path}, "", {"image", "label"});
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 2;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::vector<int32_t> size = {30,30};
std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>();
std::shared_ptr<TensorTransform> resize_op = std::make_shared<vision::Resize>(size);
EXPECT_NE(decode_op, nullptr);
EXPECT_NE(resize_op, nullptr);
// Create a Map operation on ds
// {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
ds = ds->Map({decode_op, resize_op}, {}, {}, {"image"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
// 'label' is dropped during the project op
EXPECT_EQ(row.find("label"), row.end());
// 'image' column should still exist
EXPECT_NE(row.find("image"), row.end());
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto image = row["image"];
MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
EXPECT_EQ(image.Shape()[0], 30);
EXPECT_EQ(image.Shape()[1], 30);
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 6);
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Batch on TFRecord
// Description: Iterate through dataset, count the number of rows and verify the data in the row
// Expectation: There should be 1 row in the dataset and the data should the expected data
TEST_F(MindDataTestPipeline, TestBatch) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatch.";
// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/testBatchDataset/test.data";
std::vector<std::string> files = {file_path};
std::shared_ptr<Dataset> ds = TFRecord(files, nullptr, {}, 0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 12;
ds = ds->Batch(batch_size);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
std::vector<int64_t> data = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
uint64_t i = 0;
while (row.size() != 0) {
i++;
auto this_row = row["col_sint64"];
auto value = this_row.Data();
int64_t *p = (int64_t *)value.get();
for (size_t j = 0; j < data.size(); j++) {
EXPECT_EQ(p[j], data[j]);
}
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline
iter->Stop();
}
void TestRepeatBatch(bool drop, uint64_t expected_rows, std::string datasets_root_path) {
// Create a TFRecord Dataset
std::string file_path = datasets_root_path + "/testBatchDataset/test.data";
std::shared_ptr<Dataset> ds = TFRecord({file_path});
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 2;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 7;
ds = ds->Batch(batch_size, drop);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, expected_rows);
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Repeat and Batch on TFRecord
// Description: Apply repeat then batch with drop on and off, count rows in the dataset
// Expectation: The number of rows should equal the expected number of rows
TEST_F(MindDataTestPipeline, TestRepeatBatchDrop) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatBatchDrop.";
TestRepeatBatch(true, 3, datasets_root_path_);
TestRepeatBatch(false, 4, datasets_root_path_);
}
void TestBatchRepeat(bool drop, uint64_t expected_rows, std::string datasets_root_path) {
// Create a TFRecord Dataset
std::string file_path = datasets_root_path + "/testBatchDataset/test.data";
std::shared_ptr<Dataset> ds = TFRecord({file_path});
EXPECT_NE(ds, nullptr);
// Create a Batch operation on ds
int32_t batch_size = 7;
ds = ds->Batch(batch_size, drop);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 2;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, expected_rows);
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Batch and Repeat on TFRecord
// Description: Apply batch then repeat with drop on and off, count rows in the dataset
// Expectation: The number of rows should equal the expected number of rows
TEST_F(MindDataTestPipeline, TestBatchDropRepeat) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchDropRepeat.";
TestBatchRepeat(true, 2, datasets_root_path_);
TestBatchRepeat(false, 4, datasets_root_path_);
}
// Feature: Test Map on TFRecord
// Description: Apply Map with a TensorOp that does noting but swaps input columns with output column
// Expectation: "Image" column is replaced with "X"
TEST_F(MindDataTestPipeline, TestMap) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.TestMap";
// Create a TFRecord Dataset
std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data";
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"},
0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::shared_ptr<TensorTransform> no_op = std::make_shared<mindspore::dataset::test::NoTransform>();
EXPECT_NE(no_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({no_op}, {"image"}, {"X"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
while (row.size() != 0) {
EXPECT_EQ(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("X"), row.end());
EXPECT_NE(row.find("A"), row.end());
EXPECT_NE(row.find("B"), row.end());
ASSERT_OK(iter->GetNextRow(&row));
}
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Map on TFRecord
// Description: Apply Map with a TensorOp that swaps 3 input columns with 1 output column
// Expectation: "Image", "A", "B" are replaced with "X"
TEST_F(MindDataTestPipeline, Test3to1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.Test3to1";
// Create a TFRecord Dataset
std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data";
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"},
0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::shared_ptr<TensorTransform> three_to_one_op = std::make_shared<mindspore::dataset::test::ThreeToOneTransform>();
EXPECT_NE(three_to_one_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({three_to_one_op}, {"image", "A", "B"}, {"X"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
while (row.size() != 0) {
EXPECT_EQ(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("X"), row.end());
EXPECT_EQ(row.find("A"), row.end());
EXPECT_EQ(row.find("B"), row.end());
ASSERT_OK(iter->GetNextRow(&row));
}
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Map on TFRecord
// Description: Apply Map with a TensorOp that swaps 1 input column with 3 output columns
// Expectation: "Image" is replaced with "X", "Y", "Z"
TEST_F(MindDataTestPipeline, Test1to3) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.Test1to3";
// Create a TFRecord Dataset
std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data";
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"},
0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::shared_ptr<TensorTransform> one_to_three_op = std::make_shared<mindspore::dataset::test::OneToThreeTransform>();
EXPECT_NE(one_to_three_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({one_to_three_op}, {"image"}, {"X", "Y", "Z"}, {"X", "Y", "Z", "label", "A", "B"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
while (row.size() != 0) {
EXPECT_EQ(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_NE(row.find("A"), row.end());
EXPECT_NE(row.find("B"), row.end());
EXPECT_NE(row.find("X"), row.end());
EXPECT_NE(row.find("Y"), row.end());
EXPECT_NE(row.find("Z"), row.end());
EXPECT_EQ(row["X"].Shape(), std::vector<int64_t>({3, 4, 2}));
EXPECT_EQ(row["Y"].Shape(), std::vector<int64_t>({3, 4, 2}));
EXPECT_EQ(row["Z"].Shape(), std::vector<int64_t>({3, 4, 2}));
EXPECT_EQ(row["A"].Shape(), std::vector<int64_t>({1, 13, 14, 12}));
EXPECT_EQ(row["B"].Shape(), std::vector<int64_t>({9}));
ASSERT_OK(iter->GetNextRow(&row));
}
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Map on TFRecord
// Description: Apply 3to1 and then 1to3 to replace 3 input columns with 3 output columns
// Expectation: "Image", "A", "B" are replaced with "X", "y", "Z"
TEST_F(MindDataTestPipeline, TestMultiTensorOp) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.TestMultiTensorOp";
// Create a TFRecord Dataset
std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data";
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"},
0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::shared_ptr<TensorTransform> three_to_one_op = std::make_shared<mindspore::dataset::test::ThreeToOneTransform>();
std::shared_ptr<TensorTransform> one_to_three_op = std::make_shared<mindspore::dataset::test::OneToThreeTransform>();
EXPECT_NE(one_to_three_op, nullptr);
EXPECT_NE(three_to_one_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({three_to_one_op, one_to_three_op}, {"image", "A", "B"}, {"X", "Y", "Z"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
while (row.size() != 0) {
EXPECT_EQ(row.find("image"), row.end());
EXPECT_NE(row.find("label"), row.end());
EXPECT_EQ(row.find("A"), row.end());
EXPECT_EQ(row.find("B"), row.end());
EXPECT_NE(row.find("X"), row.end());
EXPECT_NE(row.find("Y"), row.end());
EXPECT_NE(row.find("Z"), row.end());
EXPECT_EQ(row["X"].Shape(), std::vector<int64_t>({3, 4, 2}));
EXPECT_EQ(row["Y"].Shape(), std::vector<int64_t>({3, 4, 2}));
EXPECT_EQ(row["Z"].Shape(), std::vector<int64_t>({3, 4, 2}));
ASSERT_OK(iter->GetNextRow(&row));
}
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test Repeat and Map on TFRecord
// Description: Apply Map with NoOp and Repeat with num_repeats=3, iterate through dataset and count rows
// Expectation: There should be 10 rows in the dataset
TEST_F(MindDataTestPipeline, TestTFReaderRepeatMap) {
MS_LOG(INFO) << "Doing MindDataTestPipeline.TestTFReaderRepeatMap";
// Create a TFRecord Dataset
std::string data_file = datasets_root_path_ + "/testDataset2/testDataset2.data";
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label", "A", "B"},
0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
// Create objects for the tensor ops
std::shared_ptr<TensorTransform> no_op = std::make_shared<mindspore::dataset::test::NoTransform>();
EXPECT_NE(no_op, nullptr);
// Create a Map operation on ds
ds = ds->Map({no_op}, {"label"}, {});
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 3;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// iterate over the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
i++;
ASSERT_OK(iter->GetNextRow(&row));
}
EXPECT_EQ(i, 30);
// Manually terminate the pipeline
iter->Stop();
}

View File

@ -475,6 +475,53 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic7) {
GlobalContext::config_manager()->set_seed(curr_seed);
}
// Feature: Test Repeat and Shuffle on RandomData
// Description: Apply operations, iterate through dataset and count rows
// Expectation: There should be 30 rows in the dataset
TEST_F(MindDataTestPipeline, TestRandomDatasetBasic8) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetBasic8.";
// Create a RandomDataset
u_int32_t curr_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(246);
std::string SCHEMA_FILE = datasets_root_path_ + "/testRandomData/datasetSchema2.json";
std::shared_ptr<Dataset> ds = RandomData(10, SCHEMA_FILE);
EXPECT_NE(ds, nullptr);
// Create a Shuffle operation on ds
int32_t shuffle_size = 4;
ds = ds->Shuffle(shuffle_size);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 3;
ds = ds->Repeat(repeat_num);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
// Check if RandomData() read correct columns
uint64_t i = 0;
while (row.size() != 0) {
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 30);
// Manually terminate the pipeline
iter->Stop();
GlobalContext::config_manager()->set_seed(curr_seed);
}
TEST_F(MindDataTestPipeline, TestRandomDatasetUInt8) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetUInt8.";

View File

@ -520,3 +520,69 @@ TEST_F(MindDataTestPipeline, TestIncorrectTFrecordFile) {
auto itr = ds->CreateIterator();
EXPECT_EQ(itr, nullptr);
}
// Feature: Test TFRecord with a schema file
// Description: Create TFRecord with datasetSchema1Row.json
// Expectation: There should be 1 row in the dataset
TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic1Row) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasic.";
// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json";
std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path, {}, 0);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 1);
// Manually terminate the pipeline
iter->Stop();
}
// Feature: Test TFRecord with a schema file
// Description: Create TFRecord with datasetSchema7Rows.json
// Expectation: There should be 7 rows in the dataset
TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic7Row) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasic.";
// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::string schema_path = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json";
std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path, {}, 0);
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
ASSERT_OK(iter->GetNextRow(&row));
uint64_t i = 0;
while (row.size() != 0) {
ASSERT_OK(iter->GetNextRow(&row));
i++;
}
EXPECT_EQ(i, 7);
// Manually terminate the pipeline
iter->Stop();
}

View File

@ -214,229 +214,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestConcurrencyRequest) {
ASSERT_TRUE(rc.IsOk());
}
// Simple test with a repeated cache op over random data producer
//
// RepeatOp
// |
// CacheOp
// |
// RandomDataOp
//
TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
// Clear the rc of the master thread if any
(void)TaskManager::GetMasterThreadRc();
Status rc;
int32_t rank = 0; // not used
session_id_type env_session;
rc = GetSessionFromEnv(&env_session);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "UT test TestRandomDataCache1";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
// Create a schema using the C api's
std::unique_ptr<DataSchema> test_schema = std::make_unique<DataSchema>();
// 2 columns. First column is an "image" 640,480,3
TensorShape c1Shape({640, 480, 3});
ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
rank, // not used
&c1Shape);
// Column 2 will just be a scalar label number
TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
test_schema->AddColumn(c1);
test_schema->AddColumn(c2);
// RandomDataOp
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(4, op_connector_size, 50, std::move(test_schema));
rc = myTree->AssociateNode(myRandomDataOp);
ASSERT_TRUE(rc.IsOk());
// CacheOp
// size of 0, spilling is true
CacheClient::Builder builder;
builder.SetSessionId(env_session).SetCacheMemSz(0).SetSpill(true);
std::shared_ptr<CacheClient> myClient;
rc = builder.Build(&myClient);
ASSERT_TRUE(rc.IsOk());
int64_t num_samples = 0;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
std::shared_ptr<CacheOp> myCacheOp =
std::make_shared<CacheOp>(5, op_connector_size, myClient, std::move(seq_sampler));
ASSERT_NE(myCacheOp, nullptr);
rc = myTree->AssociateNode(myCacheOp);
ASSERT_TRUE(rc.IsOk());
// RepeatOp
uint32_t num_repeats = 4;
std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(num_repeats);
rc = myTree->AssociateNode(myRepeatOp);
ASSERT_TRUE(rc.IsOk());
// Assign tree relations and root
myCacheOp->SetTotalRepeats(num_repeats);
myCacheOp->SetNumRepeatsPerEpoch(num_repeats);
rc = myRepeatOp->AddChild(myCacheOp);
ASSERT_TRUE(rc.IsOk());
// Always set to 1 under a CacheOp because we read from it only once. The CacheOp is the one that repeats.
myRandomDataOp->SetTotalRepeats(1);
myRandomDataOp->SetNumRepeatsPerEpoch(1);
rc = myCacheOp->AddChild(myRandomDataOp);
ASSERT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRepeatOp);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
ASSERT_TRUE(rc.IsOk());
// quick check to see what tree looks like
std::ostringstream ss;
ss << *myTree; // some funny const error if I try to write directly to ms log stream
MS_LOG(INFO) << "Here's the tree:\n" << ss.str();
std::cout << *myClient << std::endl;
rc = myTree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
ASSERT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
// Don't display these rows, just count them
MS_LOG(INFO) << "Row fetched #: " << rowCount;
rc = dI.FetchNextTensorRow(&tensorList);
ASSERT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 200);
rc = myClient->DestroyCache();
ASSERT_TRUE(rc.IsOk());
}
//// Simple test with a repeated cache op over random data producer.
//// This one will exceed memory and require a spill.
////
//// RepeatOp
//// |
//// CacheOp
//// |
//// RandomDataOp
////
TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
// Clear the rc of the master thread if any
(void)TaskManager::GetMasterThreadRc();
Status rc;
int32_t rank = 0; // not used
MS_LOG(INFO) << "UT test TestRandomDataCacheSpill";
session_id_type env_session;
rc = GetSessionFromEnv(&env_session);
ASSERT_TRUE(rc.IsOk());
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
// Create a schema using the C api's
std::unique_ptr<DataSchema> test_schema = std::make_unique<DataSchema>();
// 2 columns. First column is an "image" 640,480,3
TensorShape c1Shape({640, 480, 3});
ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
rank, // not used
&c1Shape);
// Column 2 will just be a scalar label number
TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
test_schema->AddColumn(c1);
test_schema->AddColumn(c2);
// RandomDataOp
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(test_schema));
rc = myTree->AssociateNode(myRandomDataOp);
ASSERT_TRUE(rc.IsOk());
// CacheOp
int64_t num_samples = 0;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
CacheClient::Builder builder;
builder.SetSessionId(env_session).SetCacheMemSz(4).SetSpill(true);
std::shared_ptr<CacheClient> myClient;
rc = builder.Build(&myClient);
ASSERT_TRUE(rc.IsOk());
std::shared_ptr<CacheOp> myCacheOp =
std::make_shared<CacheOp>(4, op_connector_size, myClient, std::move(seq_sampler));
ASSERT_NE(myCacheOp, nullptr);
rc = myTree->AssociateNode(myCacheOp);
ASSERT_TRUE(rc.IsOk());
// RepeatOp
uint32_t num_repeats = 4;
std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(num_repeats);
rc = myTree->AssociateNode(myRepeatOp);
ASSERT_TRUE(rc.IsOk());
// Assign tree relations and root
myCacheOp->SetTotalRepeats(num_repeats);
myCacheOp->SetNumRepeatsPerEpoch(num_repeats);
rc = myRepeatOp->AddChild(myCacheOp);
ASSERT_TRUE(rc.IsOk());
// Always set to 1 under a CacheOp because we read from it only once. The CacheOp is the one that repeats.
myRandomDataOp->SetTotalRepeats(1);
myRandomDataOp->SetNumRepeatsPerEpoch(1);
rc = myCacheOp->AddChild(myRandomDataOp);
ASSERT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRepeatOp);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
ASSERT_TRUE(rc.IsOk());
std::cout << *myClient << std::endl;
rc = myTree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
ASSERT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
// Don't display these rows, just count them
MS_LOG(INFO) << "Row fetched #: " << rowCount;
rc = dI.FetchNextTensorRow(&tensorList);
ASSERT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 40);
rc = myClient->DestroyCache();
ASSERT_TRUE(rc.IsOk());
}
TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) {
// Clear the rc of the master thread if any
(void)TaskManager::GetMasterThreadRc();

View File

@ -1,210 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include "common/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/datasetops/source/manifest_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
#include "minddata/dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "securec.h"
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file,
std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr,
std::map<std::string, int32_t> map = {}, bool decode = false) {
if (sampler == nullptr) {
const int64_t num_samples = 0;
const int64_t start_index = 0;
sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
auto schema = std::make_unique<DataSchema>();
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1));
return std::make_shared<ManifestOp>(num_works, file, conns, decode, map, std::move(schema), std::move(sampler),
usage);
}
class MindDataTestManifest : public UT::DatasetOpTesting {
protected:
};
TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
auto op1 = Manifest(16, 2, 32, file);
auto op2 = Repeat(2);
op1->SetTotalRepeats(2);
op1->SetNumRepeatsPerEpoch(2);
auto tree = Build({op1, op2});
tree->Prepare();
uint32_t res[] = {0, 1, 0, 1};
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(res[i] == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 4);
}
}
TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
std::vector<int64_t> indices({1});
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
// Expect 6 samples for label 0 and 1
auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_EQ(label, 1);
}
EXPECT_TRUE(i == 1);
}
}
TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
std::map<std::string, int32_t> map;
map["cat"] = 111; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
map["dog"] = 222; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
map["wrong folder name"] = 1234; // this is skipped
auto tree = Build({Manifest(16, 2, 32, file, "train", nullptr, map)});
uint64_t res[2] = {111, 222};
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(label == res[i]);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 2);
}
}
TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
int64_t num_samples = 1;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {});
auto op2 = Repeat(4);
op1->SetTotalRepeats(4);
op1->SetNumRepeatsPerEpoch(4);
auto tree = Build({op1, op2});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(0 == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 4);
}
}
TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
int64_t num_samples = 1;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
EXPECT_TRUE(false);
} else {
DatasetIterator di(tree);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(0 == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}
EXPECT_TRUE(i == 1);
}
}

View File

@ -1,752 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <memory>
#include <vector>
#include "common/common.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
#include "minddata/dataset/engine/jagged_connector.h"
#include "minddata/dataset/kernels/image/decode_op.h"
#include "minddata/dataset/kernels/image/resize_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/kernels/tensor_op.h"
#include "utils/log_adapter.h"
using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::MsLogLevel::INFO;
namespace mindspore {
namespace dataset {
namespace test {
class NoOp : public TensorOp {
public:
NoOp(){};
~NoOp(){};
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override {
*output = std::move(input);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "NoOp"; };
std::string Name() const override { return kNoOp; }
};
class ThreeToOneOp : public TensorOp {
public:
ThreeToOneOp(){};
~ThreeToOneOp(){};
uint32_t NumInput() override { return 3; }
// Compute function that holds the actual implementation of the operation.
Status Compute(const TensorRow &input, TensorRow *output) override {
output->push_back(input[0]);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "ThreeToOneOp"; };
std::string Name() const override { return "ThreeToOneOp"; }
};
class OneToThreeOp : public TensorOp {
public:
OneToThreeOp(){};
~OneToThreeOp(){};
uint32_t NumOutput() override { return 3; }
// Compute function that holds the actual implementation of the operation.
// Simply pushing the same shared pointer of the first element of input vector three times.
Status Compute(const TensorRow &input, TensorRow *output) override {
output->push_back(input[0]);
output->push_back(input[0]);
output->push_back(input[0]);
return Status::OK();
};
void Print(std::ostream &out) const override { out << "OneToThreeOp"; };
std::string Name() const override { return "OneToThreeOp"; };
};
} // namespace test
} // namespace dataset
} // namespace mindspore
class MindDataTestMapOp : public UT::DatasetOpTesting {
public:
void SetUp() override {
DatasetOpTesting::SetUp();
dataset_path_ = datasets_root_path_ + "" + "/testDataset2/testDataset2.data";
schema_path_ = datasets_root_path_ + "" + "/testDataset2/datasetSchema.json";
GlobalInit();
// Start with an empty execution tree
my_tree_ = std::make_shared<ExecutionTree>();
}
std::shared_ptr<TFReaderOp> CreateTFReaderOp() {
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> columns_to_load = {"image", "label", "A", "B"};
(void)schema->LoadSchemaFile(schema_path_, columns_to_load);
std::vector<std::string> files = {dataset_path_};
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
1, 2, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false);
(void)my_tfreader_op->Init();
return my_tfreader_op;
}
std::shared_ptr<ExecutionTree> my_tree_;
private:
std::string dataset_path_;
std::string schema_path_;
};
std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr,
std::map<std::string, int32_t> map = {}, bool decode = false) {
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
TensorShape scalar = TensorShape::CreateScalar();
(void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
(void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar));
std::set<std::string> ext = {".jpg", ".JPEG"};
if (sampler == nullptr) {
int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data
int64_t start_index = 0;
sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
}
std::shared_ptr<ImageFolderOp> so =
std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler);
return so;
}
// TestAsMap scenario:
// TFReaderOp reads a dataset that have column ordering |image|label|A|B|.
// A TensorOp that does nothing picks the "image" column and produces a column named "X".
// Thus, based on the new MapOp behaviour, the column ordering will be |X|label|A|B|.
// Verify that the "image" column is removed and "X" column is added.
TEST_F(MindDataTestMapOp, TestAsMap) {
Status rc;
MS_LOG(INFO) << "Doing TestAsMap.";
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_no_op = std::make_shared<mindspore::dataset::test::NoOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_no_op);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> in_columns = {"image"};
std::vector<std::string> out_columns = {"X"};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size);
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
// Assign the tree root
rc = my_tree_->AssignRoot(my_map_op);
EXPECT_TRUE(rc.IsOk());
// Now prepare the tree
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(tensor_map.size(), 4);
EXPECT_EQ(tensor_map.find("image"), tensor_map.end());
EXPECT_NE(tensor_map.find("label"), tensor_map.end());
EXPECT_NE(tensor_map.find("X"), tensor_map.end());
EXPECT_NE(tensor_map.find("A"), tensor_map.end());
EXPECT_NE(tensor_map.find("B"), tensor_map.end());
}
// Test3to1 scenario:
// TFReaderOp reads a dataset that have column ordering |image|label|A|B|.
// A 3-to-1 TensorOp picks the columns [image, A, B] and produce a column named "X".
// Thus, based on the new MapOp behaviour, the column ordering will be |X|label|.
// Verify that the only columns "X" and "label" exist.
TEST_F(MindDataTestMapOp, Test3to1) {
Status rc;
MS_LOG(INFO) << "Doing Test3to1.";
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_op = std::make_shared<mindspore::dataset::test::ThreeToOneOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_op);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> in_columns = {"image", "A", "B"};
std::vector<std::string> out_columns = {"X"};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size);
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
while (!tensor_map.empty()) {
EXPECT_EQ(tensor_map.size(), 2);
EXPECT_EQ(tensor_map.find("image"), tensor_map.end());
EXPECT_NE(tensor_map.find("label"), tensor_map.end());
EXPECT_NE(tensor_map.find("X"), tensor_map.end());
EXPECT_EQ(tensor_map.find("A"), tensor_map.end());
EXPECT_EQ(tensor_map.find("B"), tensor_map.end());
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
}
}
// Test1to3 scenario:
// TFReaderOp reads a dataset that have column ordering |image|label|A|B|.
// A 1-to-3 TensorOp picks the columns [image] and produce a column named [X, Y, Z].
// Thus, based on the new MapOp behaviour, the column ordering will be |X|Y|Z|label|A|B|.
// Verify that the only columns X, Y, Z are added (to the front) and followed by columns label, A, B..
TEST_F(MindDataTestMapOp, Test1to3) {
Status rc;
MS_LOG(INFO) << "Doing Test1to3.";
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_op = std::make_shared<mindspore::dataset::test::OneToThreeOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_op);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> in_columns = {"image"};
std::vector<std::string> out_columns = {"X", "Y", "Z"};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size);
// ProjectOp
std::vector<std::string> columns_to_project = {"X", "Y", "Z", "label", "A", "B"};
std::shared_ptr<ProjectOp> my_project_op = std::make_shared<ProjectOp>(columns_to_project);
rc = my_tree_->AssociateNode(my_project_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_project_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_project_op->AddChild(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(tensor_map.size(), 6);
EXPECT_EQ(tensor_map.find("image"), tensor_map.end());
EXPECT_NE(tensor_map.find("label"), tensor_map.end());
EXPECT_NE(tensor_map.find("A"), tensor_map.end());
EXPECT_NE(tensor_map.find("B"), tensor_map.end());
EXPECT_NE(tensor_map.find("X"), tensor_map.end());
EXPECT_NE(tensor_map.find("Y"), tensor_map.end());
EXPECT_NE(tensor_map.find("Z"), tensor_map.end());
// Getting the next row as vector (by position).
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
// Based on the schema file, create the golden result to compare with.
std::vector<DataType::Type> golden_types({DataType::Type::DE_UINT8, DataType::Type::DE_UINT8,
DataType::Type::DE_UINT8, DataType::Type::DE_INT64,
DataType::Type::DE_FLOAT32, DataType::Type::DE_INT64});
std::vector<uint64_t> golden_ranks({3, 3, 3, 1, 4, 1});
std::vector<TensorShape> golden_shapes({TensorShape({3, 4, 2}), TensorShape({3, 4, 2}), TensorShape({3, 4, 2}),
TensorShape({7}), TensorShape({1, 13, 14, 12}), TensorShape({9})});
while (!tensor_list.empty()) {
for (uint32_t i = 0; i < tensor_list.size(); i++) {
EXPECT_EQ(tensor_list[i]->type(), golden_types[i]);
EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]);
EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]);
EXPECT_NE(tensor_list[i]->GetBuffer(), nullptr);
}
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
}
}
// TestMultiTensorOp scenario:
// TFReaderOp reads a dataset that have column ordering |image|label|A|B|.
// A series of 3-to-1 and 1-to-3 TensorOps are applied to [image, A, B] and
// produce final output columns [X, Y, Z].
// Based on the new MapOp behaviour, the column ordering will be |X|Y|Z|label|.
TEST_F(MindDataTestMapOp, TestMultiTensorOp) {
Status rc;
MS_LOG(INFO) << "Doing TestMultiTensorOp.";
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_op1 = std::make_shared<mindspore::dataset::test::ThreeToOneOp>();
auto my_op2 = std::make_shared<mindspore::dataset::test::OneToThreeOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_op1);
my_func_list.push_back(my_op2);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> in_columns = {"image", "A", "B"};
std::vector<std::string> out_columns = {"X", "Y", "Z"};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(in_columns, out_columns, std::move(my_func_list), 1, op_connector_size);
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
while (!tensor_map.empty()) {
EXPECT_EQ(tensor_map.size(), 4);
EXPECT_EQ(tensor_map.find("image"), tensor_map.end());
EXPECT_EQ(tensor_map.find("A"), tensor_map.end());
EXPECT_EQ(tensor_map.find("B"), tensor_map.end());
EXPECT_NE(tensor_map.find("label"), tensor_map.end());
EXPECT_NE(tensor_map.find("X"), tensor_map.end());
EXPECT_NE(tensor_map.find("Y"), tensor_map.end());
EXPECT_NE(tensor_map.find("Z"), tensor_map.end());
// XYZ are Tensor shared_ptr to image, so it should have the same shape as image column.
EXPECT_EQ(tensor_map["X"]->shape(), TensorShape({3, 4, 2}));
EXPECT_EQ(tensor_map["Y"]->shape(), TensorShape({3, 4, 2}));
EXPECT_EQ(tensor_map["Z"]->shape(), TensorShape({3, 4, 2}));
rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk());
}
}
TEST_F(MindDataTestMapOp, TestTFReaderRepeatMap) {
Status rc;
MS_LOG(INFO) << "Doing TestTFReaderRepeatMap.";
uint32_t num_repeats = 3;
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total
// of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_no_op = std::make_shared<mindspore::dataset::test::NoOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_no_op);
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree_->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> in_columns = {"label"};
std::vector<std::string> out_columns = {};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(in_columns, out_columns, std::move(my_func_list), 5, op_connector_size);
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_op->AddChild(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
my_tfreader_op->SetTotalRepeats(num_repeats);
my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_map_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(tensor_list.size(), 4);
uint32_t row_count = 0;
while (!tensor_list.empty()) {
row_count++;
MS_LOG(INFO) << "row_count: " << row_count << ".";
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
}
ASSERT_EQ(row_count, 10 * num_repeats);
}
TEST_F(MindDataTestMapOp, TestTFReaderMapRepeat) {
Status rc;
MS_LOG(INFO) << "Doing TestTFReaderMapRepeat.";
uint32_t num_repeats = 3;
// Note: The above TFReader config yields 5 buffers, each with 2 rows, for a total
// of 10 rows.
auto my_tfreader_op = this->CreateTFReaderOp();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto my_no_op = std::make_shared<mindspore::dataset::test::NoOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(my_no_op);
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree_->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> input_columns = {"label"};
std::vector<std::string> output_columns = {};
std::shared_ptr<MapOp> my_map_op =
std::make_shared<MapOp>(input_columns, output_columns, std::move(my_func_list), 50, op_connector_size);
rc = my_tree_->AssociateNode(my_map_op);
EXPECT_TRUE(rc.IsOk());
my_map_op->SetTotalRepeats(num_repeats);
my_map_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_map_op);
EXPECT_TRUE(rc.IsOk());
my_tfreader_op->SetTotalRepeats(num_repeats);
my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_map_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(tensor_list.size(), 4);
uint32_t row_count = 0;
while (!tensor_list.empty()) {
row_count++;
MS_LOG(INFO) << "row_count: " << row_count << ".";
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
}
ASSERT_EQ(row_count, 10 * num_repeats);
}
TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) {
Status rc;
MS_LOG(INFO) << "Doing TFReader_Decode_Repeat_Resize.";
uint32_t num_repeats = 2;
std::string dataset_path = datasets_root_path_ + "/" + "test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> columns_to_load = {"image", "label"};
std::vector<std::string> files = {dataset_path};
std::shared_ptr<TFReaderOp> my_tfreader_op = std::make_shared<TFReaderOp>(
1, 2, 0, files, std::move(schema), op_connector_size, columns_to_load, false, 1, 0, false);
(void)my_tfreader_op->Init();
rc = my_tree_->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
auto decode_op = std::make_shared<DecodeOp>();
std::vector<std::shared_ptr<TensorOp>> my_func_list;
my_func_list.push_back(decode_op);
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree_->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
std::vector<std::string> input_columns = {"image"};
std::vector<std::string> output_columns = {};
std::shared_ptr<MapOp> my_map_decode_op =
std::make_shared<MapOp>(input_columns, output_columns, std::move(my_func_list), 4, op_connector_size);
rc = my_tree_->AssociateNode(my_map_decode_op);
EXPECT_TRUE(rc.IsOk());
auto resize_op = std::make_shared<ResizeOp>(300, 300);
std::vector<std::shared_ptr<TensorOp>> my_func_list2;
my_func_list2.push_back(resize_op);
std::shared_ptr<MapOp> my_map_resize_op =
std::make_shared<MapOp>(input_columns, output_columns, std::move(my_func_list2), 5, op_connector_size);
rc = my_tree_->AssociateNode(my_map_resize_op);
EXPECT_TRUE(rc.IsOk());
my_tfreader_op->SetTotalRepeats(num_repeats);
my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_map_decode_op->AddChild(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
my_map_decode_op->SetTotalRepeats(num_repeats);
my_map_decode_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_map_decode_op);
EXPECT_TRUE(rc.IsOk());
rc = my_map_resize_op->AddChild(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->AssignRoot(my_map_resize_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
EXPECT_EQ(tensor_list.size(), 2);
uint32_t row_count = 0;
while (!tensor_list.empty()) {
row_count++;
rc = di.FetchNextTensorRow(&tensor_list);
EXPECT_TRUE(rc.IsOk());
}
ASSERT_EQ(row_count, 6);
}
TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
Status rc;
MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize.";
std::string folder_path = datasets_root_path_ + "/testPK/data";
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> repeat_op = std::make_shared<RepeatOp>(num_repeats);
EXPECT_TRUE(rc.IsOk());
auto decode_op = std::make_shared<DecodeOp>();
std::vector<std::shared_ptr<TensorOp>> func_list;
func_list.push_back(decode_op);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_parallel_workers = config_manager->num_parallel_workers();
std::vector<std::string> input_columns = {"image"};
std::vector<std::string> output_columns = {};
std::shared_ptr<MapOp> map_decode_map =
std::make_shared<MapOp>(input_columns, output_columns, func_list, 4, op_connector_size);
auto resize_op = std::make_shared<ResizeOp>(300, 300);
std::vector<std::shared_ptr<TensorOp>> func_list2;
func_list2.push_back(resize_op);
std::shared_ptr<MapOp> map_resize_op =
std::make_shared<MapOp>(input_columns, output_columns, func_list2, 5, op_connector_size);
auto image_folder_op = ImageFolder(num_parallel_workers, 2, 32, folder_path, false);
image_folder_op->SetTotalRepeats(num_repeats);
image_folder_op->SetNumRepeatsPerEpoch(num_repeats);
map_decode_map->SetTotalRepeats(num_repeats);
map_decode_map->SetNumRepeatsPerEpoch(num_repeats);
my_tree_ = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op});
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
int32_t img_class[] = {0, 1, 2, 3};
std::string result;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
EXPECT_TRUE(img_class[(i % 44) / 11] == label);
// Dump all the image into string, to be used as a comparison later.
result.append((char *)tensor_map["image"]->GetBuffer(), (int64_t)tensor_map["image"]->Size());
ASSERT_OK(di.GetNextAsMap(&tensor_map));
i++;
}
EXPECT_TRUE(i == 88);
// Part-2 : creating mapop with performance mode = false, to check if the result is the same
// as when performance mode = true.
repeat_op = std::make_shared<RepeatOp>(num_repeats);
EXPECT_TRUE(rc.IsOk());
map_decode_map = std::make_shared<MapOp>(input_columns, output_columns, func_list, 14, op_connector_size);
map_resize_op = std::make_shared<MapOp>(input_columns, output_columns, func_list2, 15, op_connector_size);
image_folder_op = ImageFolder(16, 2, 32, folder_path, false);
image_folder_op->SetTotalRepeats(num_repeats);
image_folder_op->SetNumRepeatsPerEpoch(num_repeats);
map_decode_map->SetTotalRepeats(num_repeats);
map_decode_map->SetNumRepeatsPerEpoch(num_repeats);
auto my_tree_2 = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op});
rc = my_tree_2->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_2->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di2(my_tree_2);
ASSERT_OK(di2.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
i = 0;
label = 0;
std::string result2;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
EXPECT_TRUE(img_class[(i % 44) / 11] == label);
result2.append((char *)tensor_map["image"]->GetBuffer(), (int64_t)tensor_map["image"]->Size());
ASSERT_OK(di2.GetNextAsMap(&tensor_map));
i++;
}
EXPECT_TRUE(i == 88);
EXPECT_EQ(result.size(), result2.size());
EXPECT_EQ(result, result2);
}
TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize_NoInputColumns) {
Status rc;
MS_LOG(INFO) << "Doing ImageFolder_Decode_Repeat_Resize_NoInputColumns.";
std::string folder_path = datasets_root_path_ + "/testPK/data";
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> repeat_op = std::make_shared<RepeatOp>(num_repeats);
;
auto decode_op = std::make_shared<DecodeOp>();
std::vector<std::shared_ptr<TensorOp>> func_list;
func_list.push_back(decode_op);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
auto op_connector_size = config_manager->op_connector_size();
std::vector<std::string> input_columns = {};
std::vector<std::string> output_columns = {};
std::shared_ptr<MapOp> map_decode_map =
std::make_shared<MapOp>(input_columns, output_columns, std::move(func_list), 4, op_connector_size);
;
auto resize_op = std::make_shared<ResizeOp>(300, 300);
std::vector<std::shared_ptr<TensorOp>> func_list2;
func_list2.push_back(resize_op);
std::shared_ptr<MapOp> map_resize_op =
std::make_shared<MapOp>(input_columns, output_columns, std::move(func_list2), 5, op_connector_size);
;
auto image_folder_op = ImageFolder(16, 2, 32, folder_path, false);
image_folder_op->SetTotalRepeats(num_repeats);
image_folder_op->SetNumRepeatsPerEpoch(num_repeats);
map_decode_map->SetTotalRepeats(num_repeats);
map_decode_map->SetNumRepeatsPerEpoch(num_repeats);
my_tree_ = Build({image_folder_op, map_decode_map, repeat_op, map_resize_op});
rc = my_tree_->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = my_tree_->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree_);
TensorMap tensor_map;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
EXPECT_TRUE(rc.IsOk());
uint64_t i = 0;
int32_t label = 0;
int32_t img_class[] = {0, 1, 2, 3};
std::string result;
while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(img_class[(i % 44) / 11] == label);
ASSERT_OK(di.GetNextAsMap(&tensor_map));
i++;
}
EXPECT_TRUE(i == 88);
}

View File

@ -62,131 +62,6 @@ std::shared_ptr<MindRecordOp> CreateMindRecord(int32_t mind_record_workers, bool
return std::move(op);
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) {
// single MindRecord op and nothing else
//
// MindRecordOp
MS_LOG(INFO) << "UT test TestMindRecordBasic";
Status rc;
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
// Test info:
// Dataset from testDataset1 has 10 rows, 2 columns.
// RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row
// only. 2 workers.
// Test a column selection instead of all columns as well.
std::vector<std::string> column_list;
std::string label_col_name("file_name");
column_list.push_back(label_col_name);
label_col_name = "label";
column_list.push_back(label_col_name);
std::shared_ptr<MindRecordOp> my_mindrecord_op = CreateMindRecord(
4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {});
MS_LOG(DEBUG) << (*my_mindrecord_op);
my_tree->AssociateNode(my_mindrecord_op);
// Set children/root layout.
my_tree->AssignRoot(my_mindrecord_op);
MS_LOG(INFO) << "Launching tree and begin iteration";
my_tree->Prepare();
my_tree->Launch();
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str());
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) {
// single MindRecord op and nothing else
//
// MindRecordOp
MS_LOG(INFO) << "UT test TestMindRecordSample";
Status rc;
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
// Test info:
// Dataset from testDataset1 has 10 rows, 2 columns.
// RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row
// only. 2 workers.
// Test a column selection instead of all columns as well.
std::vector<std::string> column_list;
std::string label_col_name("file_name");
column_list.push_back(label_col_name);
label_col_name = "label";
column_list.push_back(label_col_name);
std::vector<std::shared_ptr<mindspore::mindrecord::ShardOperator>> operators;
operators.push_back(std::make_shared<mindspore::mindrecord::ShardSample>(4));
std::shared_ptr<MindRecordOp> my_mindrecord_op =
CreateMindRecord(4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"},
column_list, operators);
MS_LOG(DEBUG) << (*my_mindrecord_op);
my_tree->AssociateNode(my_mindrecord_op);
// Set children/root layout.
my_tree->AssignRoot(my_mindrecord_op);
MS_LOG(INFO) << "Launching tree and begin iteration";
my_tree->Prepare();
my_tree->Launch();
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str());
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) {
// single MindRecord op and nothing else
//
@ -319,148 +194,3 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) {
row_count++;
}
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
// single MindRecord op and nothing else
//
// MindRecordOp
MS_LOG(INFO) << "UT test TestMindRecordRepeat";
Status rc;
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
// Test info:
// Dataset from testDataset1 has 10 rows, 2 columns.
// RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row
// only. 2 workers.
// Test a column selection instead of all columns as well.
std::vector<std::string> column_list;
std::string label_col_name("file_name");
column_list.push_back(label_col_name);
label_col_name = "label";
column_list.push_back(label_col_name);
std::shared_ptr<MindRecordOp> my_mindrecord_op = CreateMindRecord(
4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {});
MS_LOG(DEBUG) << (*my_mindrecord_op);
rc = my_tree->AssociateNode(my_mindrecord_op);
EXPECT_TRUE(rc.IsOk());
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
my_mindrecord_op->SetTotalRepeats(num_repeats);
my_mindrecord_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_mindrecord_op);
EXPECT_TRUE(rc.IsOk());
// Set children/root layout.
rc = my_tree->AssignRoot(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
my_tree->Prepare();
my_tree->Launch();
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str());
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
// single MindRecord op and nothing else
//
// MindRecordOp
MS_LOG(INFO) << "UT test TestMindRecordBlockReaderRepeat";
Status rc;
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
// Test info:
// Dataset from testDataset1 has 10 rows, 2 columns.
// RowsPerBuffer buffer setting of 3 yields 4 buffers with the last buffer having single row
// only. 2 workers.
// Test a column selection instead of all columns as well.
std::vector<std::string> column_list;
std::string label_col_name("file_name");
column_list.push_back(label_col_name);
label_col_name = "label";
column_list.push_back(label_col_name);
std::shared_ptr<MindRecordOp> my_mindrecord_op = CreateMindRecord(
4, true, {mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}, column_list, {});
MS_LOG(DEBUG) << (*my_mindrecord_op);
rc = my_tree->AssociateNode(my_mindrecord_op);
EXPECT_TRUE(rc.IsOk());
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
my_mindrecord_op->SetTotalRepeats(num_repeats);
my_mindrecord_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_mindrecord_op);
EXPECT_TRUE(rc.IsOk());
// Set children/root layout.
rc = my_tree->AssignRoot(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
my_tree->Prepare();
my_tree->Launch();
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << (*tensor_list[i]) << std::endl;
MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str());
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
}

View File

@ -1,417 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/core/client.h"
#include "common/common.h"
#include "gtest/gtest.h"
#include <memory>
#include <vector>
#include <iostream>
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/datasetops/source/random_data_op.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/util/random.h"
using namespace mindspore::dataset;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestRandomDataOp : public UT::DatasetOpTesting {};
// Test info:
// - Simple test with a user-provided schema generated purely from DataSchema C API
// - has an interaction loop
//
// Tree: single node tree with RandomDataOp
//
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
Status rc;
int32_t rank = 0; // not used
MS_LOG(INFO) << "UT test RandomDataOpBasic1";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
// Create a schema using the C api's
std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
// RandomDataOp can randomly fill in unknown dimension lengths of a shape.
// Most other ops cannot do that as they are limited by the physical data itself. We're
// more flexible with random data since it is just making stuff up on the fly.
TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
rank, // not used
&c1Shape);
// Column 2 will just be a scalar label number
TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
testSchema->AddColumn(c1);
testSchema->AddColumn(c2);
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(1, op_connector_size, 25, std::move(testSchema));
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
std::ostringstream ss;
ss << *myRandomDataOp;
MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = myTree->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
// Don't display these rows...too big to show
MS_LOG(INFO) << "Row fetched #: " << rowCount;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 25);
}
// Test info:
// - Simple test with a randomly generated schema
// - no iteration loop on this one, just create the op
//
// Tree: single node tree with RandomDataOp
//
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
Status rc;
MS_LOG(INFO) << "UT test RandomDataOpBasic2";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp = std::make_shared<RandomDataOp>(1, op_connector_size, 0, nullptr);
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
std::ostringstream ss;
ss << *myRandomDataOp;
MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
}
// Test info:
// - json file test with iteration
//
// Tree: single node tree with RandomDataOp
//
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
Status rc;
MS_LOG(INFO) << "UT test RandomDataOpBasic3";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
std::ostringstream ss;
ss << *myRandomDataOp;
MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = myTree->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
// Don't display these rows...too big to show
MS_LOG(INFO) << "Row fetched #: " << rowCount;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 10);
}
// Test info:
// - json schema input it's a fairly simple one
// - has an interaction loop
//
// Tree: RepeatOp over RandomDataOp
//
// RepeatOp
// |
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
Status rc;
MS_LOG(INFO) << "UT test RandomDataOpBasic4";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
uint32_t numRepeats = 2;
std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
rc = myTree->AssociateNode(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
myRandomDataOp->SetTotalRepeats(numRepeats);
myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
rc = myRepeatOp->AddChild(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = myTree->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
MS_LOG(INFO) << "Row display for row #: " << rowCount;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensorList.size(); i++) {
std::ostringstream ss;
ss << *tensorList[i] << std::endl;
MS_LOG(INFO) << "Tensor print: %s" << ss.str();
}
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 20);
}
// Test info:
// - json schema input it's a fairly simple one
// - has an interaction loop
// - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
//
// Tree: RepeatOp over RandomDataOp
//
// RepeatOp
// |
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
Status rc;
MS_LOG(INFO) << "UT test RandomDataOpBasic5";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
uint32_t numRepeats = 3;
std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
rc = myTree->AssociateNode(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
myRandomDataOp->SetTotalRepeats(numRepeats);
myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
rc = myRepeatOp->AddChild(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = myTree->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
MS_LOG(INFO) << "Row display for row #: " << rowCount;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensorList.size(); i++) {
std::ostringstream ss;
ss << *tensorList[i] << std::endl;
MS_LOG(INFO) << "Tensor print: ", ss.str();
}
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 30);
}
// Test info:
// - repeat shuffle random
//
// Tree: RepeatOp over RandomDataOp
//
// RepeatOp
// |
// ShuffleOp
// |
// RandomDataOp
//
TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
Status rc;
MS_LOG(INFO) << "UT test RandomDataOpTree1";
// Start with an empty execution tree
auto myTree = std::make_shared<ExecutionTree>();
std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
auto op_connector_size = cfg->op_connector_size();
std::shared_ptr<RandomDataOp> myRandomDataOp =
std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
rc = myTree->AssociateNode(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
uint32_t shuffle_seed = GetSeed();
std::shared_ptr<ShuffleOp> myShuffleOp = std::make_shared<ShuffleOp>(4, shuffle_seed, op_connector_size, true);
rc = myTree->AssociateNode(myShuffleOp);
EXPECT_TRUE(rc.IsOk());
uint32_t numRepeats = 3;
std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
rc = myTree->AssociateNode(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
myShuffleOp->SetTotalRepeats(numRepeats);
myShuffleOp->SetNumRepeatsPerEpoch(numRepeats);
rc = myRepeatOp->AddChild(myShuffleOp);
EXPECT_TRUE(rc.IsOk());
myRandomDataOp->SetTotalRepeats(numRepeats);
myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
rc = myShuffleOp->AddChild(myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssignRoot(myRepeatOp);
EXPECT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration";
rc = myTree->Prepare();
EXPECT_TRUE(rc.IsOk());
rc = myTree->Launch();
EXPECT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator dI(myTree);
TensorRow tensorList;
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
int rowCount = 0;
while (!tensorList.empty()) {
MS_LOG(INFO) << "Row display for row #: " << rowCount;
// Display the tensor by calling the printer on it
for (int i = 0; i < tensorList.size(); i++) {
std::ostringstream ss;
ss << *tensorList[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str();
}
rc = dI.FetchNextTensorRow(&tensorList);
EXPECT_TRUE(rc.IsOk());
rowCount++;
}
ASSERT_EQ(rowCount, 30);
}

View File

@ -33,63 +33,6 @@ using mindspore::MsLogLevel::INFO;
class MindDataTestTFReaderOp : public UT::DatasetOpTesting {};
TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
int32_t worker_connector_size = config_manager->worker_connector_size();
std::vector<std::string> files = {dataset_path};
std::vector<std::string> columns_to_load = {};
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 12);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
@ -318,201 +261,6 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
ASSERT_EQ(row_count, 12);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
// TFReaderOp
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
int32_t worker_connector_size = 16;
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> files = {dataset_path};
std::vector<std::string> columns_to_load = {};
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
// RepeatOp
uint32_t num_repeats = 3;
std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
rc = my_tree->AssociateNode(my_repeat_op);
ASSERT_TRUE(rc.IsOk());
// Set children/root layout.
my_tfreader_op->SetTotalRepeats(num_repeats);
my_tfreader_op->SetNumRepeatsPerEpoch(num_repeats);
rc = my_repeat_op->AddChild(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_repeat_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 12 * 3);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
std::vector<std::string> files = {dataset_path + "/test.data"};
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::vector<std::string> columns_to_load;
columns_to_load.push_back("col_sint32");
columns_to_load.push_back("col_binary");
schema->LoadSchemaFile(dataset_path + "/datasetSchema.json", columns_to_load);
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t worker_connector_size = config_manager->worker_connector_size();
int32_t num_workers = 1;
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
// Display the tensor by calling the printer on it
ASSERT_EQ(tensor_list.size(), columns_to_load.size());
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 12);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
std::string data_schema_filepath = dataset_path + "/datasetSchema1Row.json";
// TFReaderOp
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
int32_t worker_connector_size = config_manager->worker_connector_size();
std::vector<std::string> files = {dataset_path + "/test.data"};
std::vector<std::string> columns_to_load = {};
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 1);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
@ -575,122 +323,6 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) {
ASSERT_EQ(row_count, 5);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
std::string data_schema_filepath = dataset_path + "/datasetSchema7Rows.json";
// TFReaderOp
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {});
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
int32_t worker_connector_size = config_manager->worker_connector_size();
std::vector<std::string> files = {dataset_path + "/test.data"};
std::vector<std::string> columns_to_load = {};
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 7);
}
TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) {
// Start with an empty execution tree
auto my_tree = std::make_shared<ExecutionTree>();
Status rc;
std::string dataset_path;
dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
int32_t op_connector_size = config_manager->op_connector_size();
int32_t num_workers = 1;
std::vector<std::string> columns_to_load = {};
std::vector<std::string> files = {dataset_path};
int32_t worker_connector_size = config_manager->worker_connector_size();
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::shared_ptr<TFReaderOp> my_tfreader_op =
std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
columns_to_load, false, 1, 0, false);
rc = my_tfreader_op->Init();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssignRoot(my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
MS_LOG(INFO) << "Launching tree and begin iteration.";
rc = my_tree->Prepare();
ASSERT_TRUE(rc.IsOk());
rc = my_tree->Launch();
ASSERT_TRUE(rc.IsOk());
// Start the loop of reading tensors from our pipeline
DatasetIterator di(my_tree);
TensorRow tensor_list;
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
int row_count = 0;
while (!tensor_list.empty()) {
// Display the tensor by calling the printer on it
ASSERT_EQ(tensor_list.size(), 9);
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
}
rc = di.FetchNextTensorRow(&tensor_list);
ASSERT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 12);
}
TEST_F(MindDataTestTFReaderOp, TestTotalRowsBasic) {
std::string tf_file = datasets_root_path_ + "/testTFTestAllTypes/test.data";