diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc index 993c987660e..df66b04fbc8 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc @@ -233,7 +233,7 @@ std::shared_ptr DatasetNode::SetNumWorkers(int32_t num_workers) { return shared_from_this(); } -DatasetNode::DatasetNode() : cache_(nullptr), parent_(nullptr), children_({}) { +DatasetNode::DatasetNode() : cache_(nullptr), parent_({}), children_({}) { // Fetch some default value from config manager std::shared_ptr cfg = GlobalContext::config_manager(); num_workers_ = cfg->num_parallel_workers(); @@ -294,30 +294,33 @@ void DatasetNode::PrintNode(std::ostream &out, int *level) const { // Add a node as a child, node's parent needs to be nullptr // this function will allow child to be a nullptr, in which case it will simply skip void DatasetNode::AddChild(std::shared_ptr child) { - if (child != nullptr && child->parent_ == nullptr) { + if (child != nullptr && !child->parent_.size()) { children_.push_back(child); - child->parent_ = this; + child->parent_.push_back(this); } else if (child != nullptr) { - MS_LOG(WARNING) << "DatasetNode::AddChild() Fail" + child->Name() + "'s parent isn't a nullptr."; + MS_LOG(WARNING) << "DatasetNode::AddChild() failed: " + child->Name() + "'s parent isn't a nullptr."; + children_.push_back(child); + child->parent_.push_back(this); } } // Remove this node from its parent. Add the child of this node to its parent. // for now, this remove is limited to node with a single child or no child Status DatasetNode::Remove() { - CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "Cannot remove root or a node without parent."); + CHECK_FAIL_RETURN_UNEXPECTED(parent_.size() != 0, "Cannot remove root or a node without parent."); CHECK_FAIL_RETURN_UNEXPECTED(children_.size() < 2, "Cannot remove node with more than 1 child."); if (children_.empty()) { // I am a leaf node, remove me from my parent's children list - parent_->children_.erase(std::remove(parent_->children_.begin(), parent_->children_.end(), shared_from_this()), - parent_->children_.end()); // removal using "erase remove idiom" - } else { // replace my position in my parent's children list with my single child - auto itr = std::find(parent_->children_.begin(), parent_->children_.end(), shared_from_this()); - CHECK_FAIL_RETURN_UNEXPECTED(itr != parent_->children_.end(), "I am not in my parent's children list."); + parent_[0]->children_.erase( + std::remove(parent_[0]->children_.begin(), parent_[0]->children_.end(), shared_from_this()), + parent_[0]->children_.end()); // removal using "erase remove idiom" + } else { // replace my position in my parent's children list with my single child + auto itr = std::find(parent_[0]->children_.begin(), parent_[0]->children_.end(), shared_from_this()); + CHECK_FAIL_RETURN_UNEXPECTED(itr != parent_[0]->children_.end(), "I am not in my parent's children list."); children_[0]->parent_ = parent_; // set my single child's parent ptr to my parent *itr = std::move(children_[0]); // replace me in my parent's children list with my single child children_.clear(); // release my single child from my children list } - parent_ = nullptr; + parent_[0] = nullptr; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h index 11c432228c0..eb930d90e01 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h @@ -183,6 +183,10 @@ class DatasetNode : public std::enable_shared_from_this { /// \return Child nodes const std::vector> Children() const { return children_; } + /// \brief Getter function for parents nodes + /// \return Parent nodes + const std::vector Parent() const { return parent_; } + /// \brief Establish the parent-child relationship between this node and its child. void AddChild(std::shared_ptr child); @@ -233,7 +237,7 @@ class DatasetNode : public std::enable_shared_from_this { protected: std::vector> children_; - DatasetNode *parent_; + std::vector parent_; std::shared_ptr cache_; int64_t dataset_size_ = -1; int32_t num_workers_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/root_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/root_node.cc index 6bc13c2022b..46f4ee87731 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/root_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/root_node.cc @@ -52,7 +52,7 @@ Status RootNode::ValidateParams() { MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } - if (parent_ != nullptr) { + if (parent_.size() != 0) { std::string err_msg = "Internal error: root node should not have a parent"; MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc index 1dd225b4e79..100e73b9124 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc @@ -96,6 +96,10 @@ Status TreeAdapter::PostPass(std::shared_ptr ir) { } Status TreeAdapter::BuildExecutionTree(std::shared_ptr ir, std::shared_ptr *op) { + // Check if pipeline is valid or not + CHECK_FAIL_RETURN_UNEXPECTED(ir->Parent().size() <= 1, + "The data pipeline is not a tree (i.e. one node has two consumers)"); + // Build the DatasetOp ExecutionTree from the optimized IR tree std::vector> ops = ir->Build(); RETURN_IF_NOT_OK(ir->BuildStatus()); // remove me after changing return val of Build() diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 2dee4cb28bb..b5725cf709d 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -4511,7 +4511,7 @@ class Schema: Parse the columns and add it to self. Args: - columns (Union[dict, list[dict]]): Dataset attribute information, decoded from schema file. + columns (Union[dict, list[dict], tuple[dict]]): Dataset attribute information, decoded from schema file. - list[dict], 'name' and 'type' must be in keys, 'shape' optional. @@ -4519,7 +4519,6 @@ class Schema: Raises: RuntimeError: If failed to parse columns. - RuntimeError: If unknown items in columns. RuntimeError: If column's name field is missing. RuntimeError: If column's type field is missing. diff --git a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc index dc40da72335..71b926c1c4c 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc @@ -415,6 +415,35 @@ TEST_F(MindDataTestPipeline, TestConcatFail4) { EXPECT_EQ(iter, nullptr); } +TEST_F(MindDataTestPipeline, TestConcatFail5) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail5."; + // This case is expected to fail because the dataset concat itself which causes ProjectNode has two parent nodes + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds1 = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds1, nullptr); + + std::shared_ptr ds2 = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds2, nullptr); + + // Create a Project operation on ds + ds1 = ds1->Project({"image"}); + EXPECT_NE(ds1, nullptr); + ds2 = ds2->Project({"image"}); + EXPECT_NE(ds2, nullptr); + + // Create a Concat operation on the ds + // Input dataset is the dataset itself + ds1 = ds1 + ds1 + ds2; + EXPECT_NE(ds1, nullptr); + + // Create an iterator over the result of the above dataset + std::shared_ptr iter = ds1->CreateIterator(); + // Expect failure: The data pipeline is not a tree + EXPECT_EQ(iter, nullptr); +} + TEST_F(MindDataTestPipeline, TestConcatSuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess.";