forked from mindspore-Ecosystem/mindspore
Tested with new test cases and all dataset UTs passed
This commit is contained in:
parent
a575af7198
commit
e28fb6ce4d
|
@ -35,6 +35,7 @@ ConcatNode::ConcatNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets
|
|||
: sampler_(sampler),
|
||||
children_flag_and_nums_(children_flag_and_nums),
|
||||
children_start_end_index_(children_start_end_index) {
|
||||
nary_op_ = true;
|
||||
for (auto const &child : datasets) AddChild(child);
|
||||
}
|
||||
|
||||
|
|
|
@ -221,15 +221,20 @@ std::shared_ptr<DatasetNode> DatasetNode::SetNumWorkers(int32_t num_workers) {
|
|||
return shared_from_this();
|
||||
}
|
||||
|
||||
DatasetNode::DatasetNode() : cache_(nullptr), parent_(nullptr), children_({}), dataset_size_(-1) {
|
||||
DatasetNode::DatasetNode()
|
||||
: cache_(nullptr),
|
||||
parent_(nullptr),
|
||||
children_({}),
|
||||
dataset_size_(-1),
|
||||
mappable_(kNotADataSource),
|
||||
nary_op_(false),
|
||||
descendant_of_cache_(false) {
|
||||
// Fetch some default value from config manager
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
connector_que_size_ = cfg->op_connector_size();
|
||||
worker_connector_size_ = cfg->worker_connector_size();
|
||||
mappable_ = kNotADataSource;
|
||||
descendant_of_cache_ = false;
|
||||
}
|
||||
|
||||
std::string DatasetNode::PrintColumns(const std::vector<std::string> &columns) const {
|
||||
|
@ -283,95 +288,268 @@ void DatasetNode::AddChild(std::shared_ptr<DatasetNode> child) {
|
|||
}
|
||||
}
|
||||
|
||||
// Add the input node to be the next child of this node
|
||||
// This function is used in doing a deep copy of the IR tree built by parsing the user code.
|
||||
// This function assumes we walk the tree in DFS left-to-right.
|
||||
// This is a temporary function to be replaced later by a set of better tree operations.
|
||||
void DatasetNode::AppendChild(std::shared_ptr<DatasetNode> child) {
|
||||
if (child != nullptr) {
|
||||
if (child->parent_ != nullptr) {
|
||||
MS_LOG(WARNING) << "Adding " + child->Name() + " to " + Name() + " but it already has a parent";
|
||||
}
|
||||
children_.push_back(child);
|
||||
child->parent_ = this;
|
||||
}
|
||||
/*
|
||||
* AppendChild(<node>) appending <node> as the last child of this node. The new node must have no parent.
|
||||
*
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* ds4->AppendChild(ds6) yields this tree
|
||||
*
|
||||
* _ ds4 _
|
||||
* / | \
|
||||
* ds3 ds2 ds6
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
*/
|
||||
Status DatasetNode::AppendChild(std::shared_ptr<DatasetNode> child) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child != nullptr, "Node to append must not be a null pointer.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child->parent_ == nullptr, "Node to append must have no parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((IsUnaryOperator() && Children().empty()) || IsNaryOperator(),
|
||||
"This node must be a unary operator with no child or an n-ary operator");
|
||||
children_.push_back(child);
|
||||
child->parent_ = this;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Add a node as a parent, node's parent needs to be empty (future use)
|
||||
Status DatasetNode::InsertAbove(std::shared_ptr<DatasetNode> node) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Inserted node must not be a null pointer.");
|
||||
/*
|
||||
* InsertChildAt(<pos>, <node>) inserts the <node> to be at the <pos> index of the vector of its child nodes.
|
||||
* As in the convention of C++, <pos> starts at position 0.
|
||||
* If the <pos> is a negative number or larger than the size of the vector minus one, an error is raised.
|
||||
*/
|
||||
Status DatasetNode::InsertChildAt(int32_t pos, std::shared_ptr<DatasetNode> child) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos > -1 && pos <= children_.size(), "Position must in the range of [0, size]");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child != nullptr, "Node to insert must not be a null pointer.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child->parent_ == nullptr, "Node to insert must have no parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((IsUnaryOperator() && Children().empty()) || IsNaryOperator(),
|
||||
"This node must be a unary operator with no child or an n-ary operator");
|
||||
children_.insert(children_.begin() + pos, child);
|
||||
child->parent_ = this;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (node->parent_ != nullptr) {
|
||||
DatasetNode *parent = node->parent_;
|
||||
for (auto i = parent->children_.size() - 1; i >= 0; --i) {
|
||||
if (parent->children_[i] == node) {
|
||||
parent->children_[i] = static_cast<std::shared_ptr<DatasetNode>>(this);
|
||||
/*
|
||||
* Insert the input <node> above this node
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* Case 1: If we want to insert a new node ds5 between ds4 and ds3, use
|
||||
* ds3->InsertAbove(ds5)
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds5 ds2
|
||||
* |
|
||||
* ds3
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* Case 2: Likewise, ds2->InsertAbove(ds6) yields
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds6
|
||||
* | |
|
||||
* ds1 ds2
|
||||
*
|
||||
* Case 3: We can insert a new node between ds3 and ds1 by ds1->InsertAbove(ds7)
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds7
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* InsertAbove() cannot use on the root node of a tree.
|
||||
*/
|
||||
Status DatasetNode::InsertAbove(std::shared_ptr<DatasetNode> node) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Node to insert must not be a null pointer.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Node to insert must have no parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node must not be the root or a node without parent.");
|
||||
auto parent = parent_;
|
||||
|
||||
// The following fields of these three nodes are changed in this function:
|
||||
// 1. parent->children_
|
||||
// 2. node->parent_ and node->children_
|
||||
// 3. this->parent_
|
||||
auto current_node_itr = std::find(parent_->children_.begin(), parent_->children_.end(), shared_from_this());
|
||||
*current_node_itr = node;
|
||||
node->parent_ = parent;
|
||||
node->children_.push_back(shared_from_this());
|
||||
parent_ = node.get();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop() detaches this node from the tree it is in. Calling Drop() from a standalone node is a no-op.
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
|
||||
*
|
||||
* ds7->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* / \
|
||||
* ds3 ds2
|
||||
*
|
||||
* Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
|
||||
* becomes its parent's child.
|
||||
*
|
||||
* ds8->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds7 ds5 ds4 ds1
|
||||
* / \
|
||||
* ds3 ds2
|
||||
*
|
||||
* Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and the node's
|
||||
* children become its parent's children.
|
||||
*
|
||||
* When the input tree is
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | |
|
||||
* ds8 ds4
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* ds4->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / \
|
||||
* ds8 ds3 ds2
|
||||
* |
|
||||
* ds7
|
||||
*
|
||||
* But if ds6 is not an n-ary operator, ds4->Drop() will raise an error because we cannot add the children of an
|
||||
* n-ary operator (ds4) to a unary operator (ds6).
|
||||
*
|
||||
* Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will be
|
||||
* squeezed left.
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* ds5->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / \
|
||||
* ds8 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* Case 5: When the node has more than one child and more than one sibling, Drop() will raise an error.
|
||||
* If we want to drop ds4 from the input tree, ds4->Drop() will not work. We will have to do it
|
||||
* with a combination of Drop(), InsertChildAt()
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* If we want to form this tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6_____
|
||||
* | / | | \
|
||||
* ds8 ds5 ds3 ds2 ds1
|
||||
* |
|
||||
* ds7
|
||||
*
|
||||
*/
|
||||
Status DatasetNode::Drop() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node to drop must not be the root or a node without parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!(IsNaryOperator() && parent_->IsUnaryOperator()),
|
||||
"Trying to drop an n-ary operator that is a child of a unary operator");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!(children_.size() > 1 && parent_->children_.size() > 1),
|
||||
"This node to drop must not have more than one child and more than one sibling.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(children_.size() == 0 || parent_->children_.size() == 1,
|
||||
"If this node to drop has children, it must be its parent's only child.");
|
||||
if (parent_->children_.size() == 1) {
|
||||
auto parent = parent_;
|
||||
// Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
|
||||
// becomes its parent's child.
|
||||
// This is the most common use case.
|
||||
if (children_.size() == 1) {
|
||||
auto child = children_[0];
|
||||
// Move its child to be its parent's child
|
||||
parent->children_[0] = child;
|
||||
child->parent_ = parent;
|
||||
} else if (children_.empty()) {
|
||||
// Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
|
||||
// Remove this node from its parent's child
|
||||
parent_->children_.clear();
|
||||
} else if (children_.size() > 1) {
|
||||
// Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and
|
||||
// the node's children become its parent's children.
|
||||
// Remove this node from its parent's child
|
||||
parent->children_.clear();
|
||||
// Move its child to be its parent's child
|
||||
for (auto &child : children_) {
|
||||
parent->children_.push_back(child);
|
||||
child->parent_ = parent;
|
||||
}
|
||||
}
|
||||
// And mark itself as an orphan
|
||||
parent_ = nullptr;
|
||||
children_.clear();
|
||||
} else if (children_.empty() && parent_->children_.size() > 1) {
|
||||
// Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will
|
||||
// be squeezed left.
|
||||
auto parent = parent_;
|
||||
// Remove this node from its parent's child
|
||||
parent->children_.erase(std::remove(parent->children_.begin(), parent->children_.end(), shared_from_this()),
|
||||
parent->children_.end()); // removal using "erase remove idiom"
|
||||
// And mark itself as an orphan
|
||||
parent_ = nullptr;
|
||||
children_.clear();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Internal error: we should not reach here.");
|
||||
}
|
||||
children_.push_back(node);
|
||||
node->parent_ = this;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Insert a node as a child of this node
|
||||
// This node's children become the children of the inserted node.
|
||||
Status DatasetNode::InsertBelow(std::shared_ptr<DatasetNode> node) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Inserted node must not be a null pointer.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->children_.empty(), "Inserted node must not have any children.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Inserted node must not have a parent.");
|
||||
|
||||
for (auto child : children_) {
|
||||
node->children_.push_back(child);
|
||||
child->parent_ = node.get();
|
||||
}
|
||||
// Then establish the new parent-child relationship with the new parent.
|
||||
children_.clear();
|
||||
children_.push_back(node);
|
||||
node->parent_ = this;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Insert a node as a child next to this node (future use)
|
||||
Status DatasetNode::InsertAfter(std::shared_ptr<DatasetNode> node) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node must have a parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Inserted node must not have a parent.");
|
||||
auto size = parent_->children_.size();
|
||||
// Duplicate the last child to increase the size by 1
|
||||
parent_->children_.push_back(parent_->children_[size - 1]);
|
||||
// Shift each child to its right until we found the insertion point, then insert the input node
|
||||
bool found = false;
|
||||
for (auto i = parent_->children_.size() - 2; i >= 0; --i) {
|
||||
if (parent_->children_[i].get() != this) {
|
||||
parent_->children_[i + 1] = parent_->children_[i];
|
||||
} else {
|
||||
parent_->children_[i + 1] = node;
|
||||
node->parent_ = parent_;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!found, "Insertion point not found.");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Remove this node from its parent. Add the child of this node to its parent.
|
||||
// for now, this remove is limited to node with a single child or no child
|
||||
Status DatasetNode::Remove() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "Cannot remove root or a node without parent.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(children_.size() < 2, "Cannot remove node with more than 1 child.");
|
||||
if (children_.empty()) { // I am a leaf node, remove me from my parent's children list
|
||||
parent_->children_.erase(std::remove(parent_->children_.begin(), parent_->children_.end(), shared_from_this()),
|
||||
parent_->children_.end()); // removal using "erase remove idiom"
|
||||
} else { // replace my position in my parent's children list with my single child
|
||||
auto itr = std::find(parent_->children_.begin(), parent_->children_.end(), shared_from_this());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != parent_->children_.end(), "I am not in my parent's children list.");
|
||||
children_[0]->parent_ = parent_; // set my single child's parent ptr to my parent
|
||||
*itr = std::move(children_[0]); // replace me in my parent's children list with my single child
|
||||
children_.clear(); // release my single child from my children list
|
||||
}
|
||||
parent_ = nullptr;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -140,7 +140,7 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
|
|||
/// \param out - The output stream to write output to
|
||||
virtual void Print(std::ostream &out) const = 0;
|
||||
|
||||
/// \brief Pure virtual function to make a new copy of the node
|
||||
/// \brief Pure virtual function to clone a new copy of the node
|
||||
/// \return The new copy of the node
|
||||
virtual std::shared_ptr<DatasetNode> Copy() = 0;
|
||||
|
||||
|
@ -187,26 +187,19 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
|
|||
/// \return The parent node (of a node from a cloned IR tree)
|
||||
DatasetNode *const Parent() const { return parent_; }
|
||||
|
||||
/// \brief Establish a parent-child relationship between this node and the input node.
|
||||
/// Used when building the IR tree.
|
||||
void AddChild(std::shared_ptr<DatasetNode> child);
|
||||
|
||||
/// \brief Establish a parent-child relationship between this node and the input node.
|
||||
/// Used during the cloning of the user-input IR tree (temporary use)
|
||||
void AppendChild(std::shared_ptr<DatasetNode> child);
|
||||
Status AppendChild(std::shared_ptr<DatasetNode> child);
|
||||
|
||||
/// \brief Establish the child-parent relationship between this node and the input node (future use)
|
||||
/// \brief Insert the input <node> above this node
|
||||
Status InsertAbove(std::shared_ptr<DatasetNode> node);
|
||||
|
||||
/// \brief Insert the input node below this node. This node's children becomes the children of the inserted node.
|
||||
Status InsertBelow(std::shared_ptr<DatasetNode> node);
|
||||
|
||||
/// \brief Add the input node as the next sibling (future use)
|
||||
Status InsertAfter(std::shared_ptr<DatasetNode> node);
|
||||
Status InsertChildAt(int32_t pos, std::shared_ptr<DatasetNode> node);
|
||||
|
||||
/// \brief detach this node from its parent, add its child (if any) to its parent
|
||||
/// \return error code, return error if node has more than 1 children
|
||||
Status Remove();
|
||||
Status Drop();
|
||||
|
||||
/// \brief Check if this node has cache
|
||||
/// \return True if the data of this node will be cached
|
||||
|
@ -216,13 +209,25 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
|
|||
/// \return True if this is a leaf node.
|
||||
const bool IsLeaf() const { return children_.empty(); }
|
||||
|
||||
/// \brief Check if this node is a unary operator node.
|
||||
/// \return True if this node is semantically a unary operator node
|
||||
const bool IsUnaryOperator() const { return (mappable_ == kNotADataSource && !nary_op_); }
|
||||
|
||||
/// \brief Check if this node is a n-ary operator node.
|
||||
/// \return True if this node is semantically a n-ary operator node
|
||||
const bool IsNaryOperator() const { return (mappable_ == kNotADataSource && nary_op_); }
|
||||
|
||||
/// \brief Check if this node is a mappable dataset. Only applicable to leaf nodes
|
||||
/// \return True if this node is a mappable dataset
|
||||
const bool IsMappable() const { return (mappable_ == kMappableSource); }
|
||||
const bool IsMappableDataSource() const { return (mappable_ == kMappableSource); }
|
||||
|
||||
/// \brief Check if this node is a non-mappable dataset. Only applicable to leaf nodes
|
||||
/// \return True if this node is a non-mappable dataset
|
||||
const bool IsNonMappable() const { return (mappable_ == kNonMappableSource); }
|
||||
const bool IsNonMappableDataSource() const { return (mappable_ == kNonMappableSource); }
|
||||
|
||||
/// \brief Check if this node is a data source node.
|
||||
/// \return True if this node is a data source node
|
||||
const bool IsDataSource() const { return (mappable_ == kMappableSource || mappable_ == kNonMappableSource); }
|
||||
|
||||
/// \brief Check if this node is not a data source node.
|
||||
/// \return True if this node is not a data source node
|
||||
|
@ -285,11 +290,15 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
|
|||
int32_t rows_per_buffer_;
|
||||
int32_t connector_que_size_;
|
||||
int32_t worker_connector_size_;
|
||||
// Establish a parent-child relationship between this node and the input node.
|
||||
// Used only in the constructor of the class and its derived classes.
|
||||
void AddChild(std::shared_ptr<DatasetNode> child);
|
||||
std::string PrintColumns(const std::vector<std::string> &columns) const;
|
||||
Status AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops);
|
||||
void PrintNode(std::ostream &out, int *level) const;
|
||||
enum DataSource { kNotADataSource = 0, kNonMappableSource = 1, kMappableSource = 2 };
|
||||
enum DataSource mappable_;
|
||||
bool nary_op_; // an indicator of whether the current node supports multiple children, true for concat/zip node
|
||||
bool descendant_of_cache_;
|
||||
};
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
|
||||
ZipNode::ZipNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets) {
|
||||
nary_op_ = true;
|
||||
for (auto const &child : datasets) AddChild(child);
|
||||
}
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ Status CacheValidationPass::Visit(std::shared_ptr<DatasetNode> node, bool *const
|
|||
// If this node is created to be cached, set the flag.
|
||||
is_cached_ = true;
|
||||
}
|
||||
if (node->IsLeaf() && node->IsMappable()) {
|
||||
if (node->IsLeaf() && node->IsMappableDataSource()) {
|
||||
is_mappable_ = true;
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
@ -52,7 +52,7 @@ Status DeepCopyPass::Visit(std::shared_ptr<DatasetNode> node, bool *const modifi
|
|||
new_node->SetNumWorkers(node->num_workers());
|
||||
// This method below assumes a DFS walk and from the first child to the last child.
|
||||
// Future: A more robust implementation that does not depend on the above assumption.
|
||||
parent_->AppendChild(new_node);
|
||||
RETURN_IF_NOT_OK(parent_->AppendChild(new_node));
|
||||
|
||||
// Then set this node to be a new parent to accept a copy of its next child
|
||||
parent_ = new_node.get();
|
||||
|
|
|
@ -31,7 +31,7 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod
|
|||
// Performs finder work for BuildVocabOp that has special rules about epoch control injection
|
||||
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) {
|
||||
// The injection is at the child of the root node
|
||||
injection_point_ = node;
|
||||
injection_point_ = node->Children()[0];
|
||||
num_epochs_ = node->num_epochs();
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabN
|
|||
Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
|
||||
// Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here.
|
||||
// Move the injection point to the child of this node.
|
||||
injection_point_ = node;
|
||||
injection_point_ = node->Children()[0];
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -71,12 +71,11 @@ Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *cons
|
|||
|
||||
// The first injection logic is to check if we should inject the epoch control op as the root node.
|
||||
// Do not inject the op if the number of epochs is 1.
|
||||
std::shared_ptr<DatasetNode> parent = finder.injection_point();
|
||||
std::shared_ptr<DatasetNode> node = finder.injection_point();
|
||||
int32_t num_epochs = finder.num_epochs();
|
||||
if (num_epochs != 1 && parent != nullptr) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent->Children().size() == 1, "EpochCtrl must be injected on only one child.");
|
||||
if (num_epochs != 1 && node != nullptr) {
|
||||
auto epoch_ctrl_node = std::make_shared<EpochCtrlNode>(num_epochs);
|
||||
RETURN_IF_NOT_OK(parent->InsertBelow(epoch_ctrl_node));
|
||||
RETURN_IF_NOT_OK(node->InsertAbove(epoch_ctrl_node));
|
||||
}
|
||||
MS_LOG(INFO) << "Pre pass: Injection pass complete.";
|
||||
return Status::OK();
|
||||
|
|
|
@ -27,7 +27,7 @@ Status InputValidationPass::Visit(std::shared_ptr<DatasetNode> node, bool *const
|
|||
RETURN_IF_NOT_OK(node->ValidateParams());
|
||||
|
||||
// A data source node must be a leaf node
|
||||
if ((node->IsMappable() || node->IsNonMappable()) && !node->IsLeaf()) {
|
||||
if ((node->IsMappableDataSource() || node->IsNonMappableDataSource()) && !node->IsLeaf()) {
|
||||
std::string err_msg = node->Name() + " is a data source and must be a leaf node.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
|
|
@ -14,41 +14,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "minddata/dataset/engine/opt/pre/node_removal_pass.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/skip_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/take_node.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
NodeRemovalPass::RemovalNodes::RemovalNodes() : is_caching_(false) {}
|
||||
|
||||
// Identifies the subtree below this node as a cached descendant tree.
|
||||
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<DatasetNode> node, bool *const modified) {
|
||||
*modified = false;
|
||||
MS_LOG(INFO) << "Node removal pass: Operation with cache found, identified descendant tree.";
|
||||
if (node->IsCached()) {
|
||||
is_caching_ = true;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Resets the tracking of the cache within the tree
|
||||
Status NodeRemovalPass::RemovalNodes::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) {
|
||||
*modified = false;
|
||||
MS_LOG(INFO) << "Removal pass: Descendant walk is complete.";
|
||||
if (is_caching_ && node->IsLeaf()) {
|
||||
// Mark this leaf node to indicate it is a descendant of an operator with cache.
|
||||
// This is currently used in non-mappable dataset (leaf) nodes to not add a ShuffleOp in DatasetNode::Build().
|
||||
node->HasCacheAbove();
|
||||
}
|
||||
is_caching_ = false;
|
||||
return Status::OK();
|
||||
}
|
||||
NodeRemovalPass::RemovalNodes::RemovalNodes() {}
|
||||
|
||||
// Perform RepeatNode removal check.
|
||||
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) {
|
||||
|
@ -59,12 +33,6 @@ Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<RepeatNode> node, bo
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
// Perform ShuffleNode removal check.
|
||||
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<ShuffleNode> node, bool *const modified) {
|
||||
*modified = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Perform SkipNode removal check.
|
||||
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<SkipNode> node, bool *const modified) {
|
||||
*modified = false;
|
||||
|
@ -95,7 +63,7 @@ Status NodeRemovalPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *co
|
|||
|
||||
// Then, execute the removal of any nodes that were set up for removal
|
||||
for (auto node : removal_nodes->nodes_to_remove()) {
|
||||
RETURN_IF_NOT_OK(node->Remove());
|
||||
RETURN_IF_NOT_OK(node->Drop());
|
||||
}
|
||||
MS_LOG(INFO) << "Pre pass: node removal pass complete.";
|
||||
return Status::OK();
|
||||
|
|
|
@ -31,7 +31,7 @@ class DatasetOp;
|
|||
/// nodes should be removed, and then removes them.
|
||||
class NodeRemovalPass : public IRTreePass {
|
||||
/// \class RemovalNodes
|
||||
/// \brief This is a NodePass who's job is to identify which nodes should be removed.
|
||||
/// \brief This is a NodePass whose job is to identify which nodes should be removed.
|
||||
/// It works in conjunction with the removal_pass.
|
||||
class RemovalNodes : public IRNodePass {
|
||||
public:
|
||||
|
@ -42,30 +42,12 @@ class NodeRemovalPass : public IRTreePass {
|
|||
/// \brief Destructor
|
||||
~RemovalNodes() = default;
|
||||
|
||||
/// \brief Identifies the subtree below this node as a cached descendant tree.
|
||||
/// \param[in] node The node being visited
|
||||
/// \param[inout] modified Indicator if the node was changed at all
|
||||
/// \return Status The status code returned
|
||||
Status Visit(std::shared_ptr<DatasetNode> node, bool *const modified) override;
|
||||
|
||||
/// \brief Resets the tracking of the cache within the tree
|
||||
/// \param[in] node The node being visited
|
||||
/// \param[inout] modified Indicator if the node was changed at all
|
||||
/// \return Status The status code returned
|
||||
Status VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) override;
|
||||
|
||||
/// \brief Perform RepeatNode removal check
|
||||
/// \param[in] node The node being visited
|
||||
/// \param[inout] modified Indicator if the node was changed at all
|
||||
/// \return Status The status code returned
|
||||
Status Visit(std::shared_ptr<RepeatNode> node, bool *const modified) override;
|
||||
|
||||
/// \brief Perform ShuffleNode removal check
|
||||
/// \param[in] node The node being visited
|
||||
/// \param[inout] modified Indicator if the node was changed at all
|
||||
/// \return Status The status code returned
|
||||
Status Visit(std::shared_ptr<ShuffleNode> node, bool *const modified) override;
|
||||
|
||||
/// \brief Perform SkipNode removal check
|
||||
/// \param[in] node The node being visited
|
||||
/// \param[inout] modified Indicator if the node was changed at all
|
||||
|
@ -83,7 +65,6 @@ class NodeRemovalPass : public IRTreePass {
|
|||
std::vector<std::shared_ptr<DatasetNode>> nodes_to_remove() { return nodes_to_remove_; }
|
||||
|
||||
private:
|
||||
bool is_caching_;
|
||||
std::vector<std::shared_ptr<DatasetNode>> nodes_to_remove_;
|
||||
};
|
||||
|
||||
|
|
|
@ -187,8 +187,10 @@ Status TreeAdapter::Compile(std::shared_ptr<DatasetNode> input_ir, int32_t num_e
|
|||
|
||||
tree_state_ = kCompileStateOptimized;
|
||||
MS_LOG(INFO) << "Plan after optimization:" << '\n' << *root_ir << '\n';
|
||||
// Remember the root node
|
||||
root_ir_ = root_ir;
|
||||
|
||||
RETURN_IF_NOT_OK(Build(root_ir, num_epochs));
|
||||
RETURN_IF_NOT_OK(Build(root_ir_, num_epochs));
|
||||
tree_state_ = kCompileStateReady;
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -46,6 +46,9 @@ class TreeAdapter {
|
|||
// the Execution tree.
|
||||
Status Compile(std::shared_ptr<DatasetNode> root_ir, int32_t num_epochs = -1);
|
||||
|
||||
// Return the root node of the IR after cloned from the parsed IR tree
|
||||
std::shared_ptr<DatasetNode> RootIRNode() const { return root_ir_; }
|
||||
|
||||
// This is the main method TreeConsumer uses to interact with TreeAdapter
|
||||
// 1. GetNext will Launch() the ExeTree on its first call by iterator (tree is already prepared)
|
||||
// 2. GetNext will return empty row when eoe/eof is obtained
|
||||
|
@ -87,6 +90,7 @@ class TreeAdapter {
|
|||
|
||||
std::unique_ptr<DataBuffer> cur_db_;
|
||||
std::unordered_map<std::string, int32_t> column_name_map_;
|
||||
std::shared_ptr<DatasetNode> root_ir_;
|
||||
std::unique_ptr<ExecutionTree> tree_; // current connector capacity of root op, used for profiling
|
||||
bool optimize_; // Flag to enable optional optimization pass
|
||||
std::shared_ptr<DatasetIteratorTracing> tracing_; // trace profiling data
|
||||
|
|
|
@ -131,13 +131,14 @@ SET(DE_UT_SRCS
|
|||
to_float16_op_test.cc
|
||||
tokenizer_op_test.cc
|
||||
treap_test.cc
|
||||
tree_modifying_function_test.cc
|
||||
trucate_pair_test.cc
|
||||
type_cast_op_test.cc
|
||||
weighted_random_sampler_test.cc
|
||||
zip_op_test.cc
|
||||
)
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
if(ENABLE_PYTHON)
|
||||
set(DE_UT_SRCS
|
||||
${DE_UT_SRCS}
|
||||
filter_op_test.cc
|
||||
|
@ -145,13 +146,18 @@ if (ENABLE_PYTHON)
|
|||
voc_op_test.cc
|
||||
sentence_piece_vocab_op_test.cc
|
||||
)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
add_executable(de_ut_tests ${DE_UT_SRCS})
|
||||
|
||||
set_target_properties(de_ut_tests PROPERTIES INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/../lib64")
|
||||
|
||||
target_link_libraries(de_ut_tests PRIVATE _c_dataengine pybind11::embed ${GTEST_LIBRARY} ${SECUREC_LIBRARY} ${SLOG_LIBRARY})
|
||||
target_link_libraries(de_ut_tests PRIVATE
|
||||
_c_dataengine pybind11::embed
|
||||
${GTEST_LIBRARY}
|
||||
${SECUREC_LIBRARY}
|
||||
${SLOG_LIBRARY}
|
||||
)
|
||||
|
||||
gtest_discover_tests(de_ut_tests WORKING_DIRECTORY ${Project_DIR}/tests/dataset)
|
||||
|
||||
|
|
|
@ -0,0 +1,567 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "common/common.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/skip_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/take_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
|
||||
#include "minddata/dataset/include/datasets.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
|
||||
class MindDataTestTreeModifying : public UT::DatasetOpTesting {
|
||||
public:
|
||||
MindDataTestTreeModifying() = default;
|
||||
};
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, AppendChild) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-AppendChild";
|
||||
/*
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* ds4->AppendChild(ds6) yields this tree
|
||||
*
|
||||
* _ ds4 _
|
||||
* / | \
|
||||
* ds3 ds2 ds6
|
||||
* |
|
||||
* ds1
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
// You can inspect the plan by sending *ir_tree->RootIRNode() to std::cout
|
||||
std::shared_ptr<DatasetNode> node_to_insert = ds6->IRNode();
|
||||
rc = ds4_node->AppendChild(node_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[2] == node_to_insert);
|
||||
EXPECT_TRUE(node_to_insert->Parent() == ds4_node.get());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, InsertChildAt01) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertChildAt01";
|
||||
/*
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* | |
|
||||
* ds1 ds5
|
||||
*
|
||||
* Case 1: ds4->InsertChildAt(1, ds6) yields this tree
|
||||
*
|
||||
* _ ds4 _
|
||||
* / | \
|
||||
* ds3 ds6 ds2
|
||||
* | |
|
||||
* ds1 ds5
|
||||
*
|
||||
* Case 2: ds4->InsertChildAt(0, ds6) yields this tree
|
||||
*
|
||||
* _ ds4 _
|
||||
* / | \
|
||||
* ds6 ds3 ds2
|
||||
* | |
|
||||
* ds1 ds5
|
||||
*
|
||||
* Case 3: ds4->InsertChildAt(2, ds6) yields this tree
|
||||
*
|
||||
* _ ds4 _
|
||||
* / | \
|
||||
* ds3 ds2 ds6
|
||||
* | |
|
||||
* ds1 ds5
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ds5->Repeat(4);
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
|
||||
// Case 1:
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<DatasetNode> ds6_to_insert = ds6->IRNode();
|
||||
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
|
||||
rc = ds4_node->InsertChildAt(1, ds6_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[1] == ds6_to_insert);
|
||||
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
|
||||
EXPECT_TRUE( ds4_node->Children()[2] == ds2_node);
|
||||
|
||||
// Case 2:
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
ds6_to_insert = ds6->IRNode();
|
||||
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
|
||||
rc = ds4_node->InsertChildAt(0, ds6_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[0] == ds6_to_insert);
|
||||
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
|
||||
EXPECT_TRUE( ds4_node->Children()[1] == ds3_node);
|
||||
|
||||
// Case 3:
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
ds6_to_insert = ds6->IRNode();
|
||||
rc = ds4_node->InsertChildAt(2, ds6_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[2] == ds6_to_insert);
|
||||
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, InsertChildAt04) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertChildAt04";
|
||||
|
||||
/*
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* | |
|
||||
* ds1 ds5
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ds5->Repeat(4);
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
|
||||
// Case 4: ds4->InsertChildAt(3, ds6) raises an error
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<DatasetNode> ds6_to_insert = ds6->IRNode();
|
||||
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
|
||||
rc = ds4_node->InsertChildAt(3, ds6_to_insert);
|
||||
EXPECT_NE(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[0] == ds3_node);
|
||||
EXPECT_TRUE( ds4_node->Children()[1] == ds2_node);
|
||||
|
||||
// Case 5: ds4->InsertChildAt(-1, ds6) raises an error
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
ds6_to_insert = ds6->IRNode();
|
||||
ds3_node = ds4_node->Children()[0];
|
||||
ds2_node = ds4_node->Children()[1];
|
||||
rc = ds4_node->InsertChildAt(-1, ds6_to_insert);
|
||||
EXPECT_NE(rc, Status::OK());
|
||||
EXPECT_TRUE( ds4_node->Children()[0] == ds3_node);
|
||||
EXPECT_TRUE( ds4_node->Children()[1] == ds2_node);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, InsertAbove01) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove01";
|
||||
/*
|
||||
* Insert the input <node> above this node
|
||||
* Input tree:
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* Case 1: If we want to insert a new node ds5 between ds4 and ds3, use
|
||||
* ds3->InsertAbove(ds5)
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds5 ds2
|
||||
* |
|
||||
* ds3
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
* Case 2: Likewise, ds2->InsertAbove(ds6) yields
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds6
|
||||
* | |
|
||||
* ds1 ds2
|
||||
*
|
||||
* Case 3: We can insert a new node between ds3 and ds1 by ds1->InsertAbove(ds7)
|
||||
*
|
||||
* ds4
|
||||
* / \
|
||||
* ds3 ds2
|
||||
* |
|
||||
* ds7
|
||||
* |
|
||||
* ds1
|
||||
*
|
||||
*/
|
||||
// Case 1
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
|
||||
std::shared_ptr<SkipNode> ds5_to_insert = std::make_shared<SkipNode>(nullptr, 1);
|
||||
rc = ds3_node->InsertAbove(ds5_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE(ds5_to_insert->Children()[0] == ds3_node);
|
||||
EXPECT_TRUE( ds3_node->Parent() == ds5_to_insert.get());
|
||||
EXPECT_TRUE( ds4_node->Children()[0] == ds5_to_insert);
|
||||
EXPECT_TRUE( ds5_to_insert->Parent() == ds4_node.get());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, InsertAbove02) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove02";
|
||||
|
||||
// Case 2
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds4 = ds2 + ds3; // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
|
||||
std::shared_ptr<TakeNode> ds6_to_insert = std::make_shared<TakeNode>(nullptr, 12);
|
||||
rc = ds2_node->InsertAbove(ds6_to_insert);
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE(ds6_to_insert->Children()[0] == ds2_node);
|
||||
EXPECT_TRUE( ds2_node->Parent() == ds6_to_insert.get());
|
||||
EXPECT_TRUE( ds4_node->Children()[1] == ds6_to_insert);
|
||||
EXPECT_TRUE( ds6_to_insert->Parent() == ds4_node.get());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, InsertAbove03) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove03";
|
||||
|
||||
// Case 3
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds4->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds1_node = ds3_node->Children()[0];
|
||||
std::shared_ptr<RepeatNode> ds7_to_insert = std::make_shared<RepeatNode>(nullptr, 3);
|
||||
rc = ds1_node->InsertAbove(ds7_to_insert);
|
||||
EXPECT_TRUE(ds7_to_insert->Children()[0] == ds1_node);
|
||||
EXPECT_TRUE( ds1_node->Parent() == ds7_to_insert.get());
|
||||
EXPECT_TRUE( ds3_node->Children()[0] == ds7_to_insert);
|
||||
EXPECT_TRUE( ds7_to_insert->Parent() == ds3_node.get());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, Drop01) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop01";
|
||||
/*
|
||||
* Drop() detaches this node from the tree it is in. Calling Drop() from a standalone node is a no-op.
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
|
||||
*
|
||||
* ds7->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* / \
|
||||
* ds3 ds2
|
||||
*
|
||||
* Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
|
||||
* becomes its parent's child.
|
||||
*
|
||||
* ds8->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds7 ds5 ds4 ds1
|
||||
* / \
|
||||
* ds3 ds2
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
|
||||
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
|
||||
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
std::shared_ptr<Dataset> ds6 = ds4->Take(13);
|
||||
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds10->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
|
||||
// Case 1
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds9_node = ds10_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds8_node = ds9_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds7_node = ds8_node->Children()[0];
|
||||
rc = ds7_node->Drop();
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// ds8 becomes a childless node
|
||||
EXPECT_TRUE(ds8_node->Children().empty());
|
||||
EXPECT_TRUE(ds7_node->Parent() == nullptr);
|
||||
EXPECT_TRUE(ds7_node->Children().empty());
|
||||
|
||||
// Case 2
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
ds9_node = ds10_node->Children()[0];
|
||||
ds8_node = ds9_node->Children()[0];
|
||||
ds7_node = ds8_node->Children()[0];
|
||||
rc = ds8_node->Drop();
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// ds7 becomes a child of ds9
|
||||
EXPECT_TRUE(ds9_node->Children()[0] == ds7_node);
|
||||
EXPECT_TRUE(ds7_node->Parent() == ds9_node.get());
|
||||
EXPECT_TRUE(ds8_node->Parent() == nullptr);
|
||||
EXPECT_TRUE(ds8_node->Children().empty());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, Drop03) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop03";
|
||||
/* Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and the node's
|
||||
* children become its parent's children.
|
||||
*
|
||||
* When the input tree is
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | |
|
||||
* ds8 ds4
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
*
|
||||
* ds4->Drop() will raise an error because we cannot add the children of an n-ary operator (ds4) to a unary operator
|
||||
* (ds6).
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
|
||||
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
|
||||
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
std::shared_ptr<Dataset> ds6 = ds4->Take(13);
|
||||
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds10->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
|
||||
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
|
||||
rc = ds4_node->Drop();
|
||||
EXPECT_NE(rc, Status::OK());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, Drop04) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop04";
|
||||
/* Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will be
|
||||
* squeezed left.
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* ds5->Drop() yields the tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / \
|
||||
* ds8 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
|
||||
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
|
||||
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds6 = ds1->Concat({ds5, ds4}); // ds1 is put after (ds5, ds4)!!!
|
||||
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds10->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
|
||||
std::shared_ptr<DatasetNode> ds5_node = ds6_node->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[1];
|
||||
EXPECT_TRUE(ds5_node->IsDataSource());
|
||||
EXPECT_TRUE(ds6_node->IsNaryOperator());
|
||||
rc = ds5_node->Drop();
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
EXPECT_TRUE(ds6_node->Children().size() == 2);
|
||||
EXPECT_TRUE(ds6_node->Children()[0] == ds4_node);
|
||||
EXPECT_TRUE(ds4_node->Parent() == ds6_node.get());
|
||||
EXPECT_TRUE(ds5_node->Parent() == nullptr);
|
||||
EXPECT_TRUE(ds5_node->Children().empty());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTreeModifying, Drop05) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop05";
|
||||
/*
|
||||
* Case 5: When the node has more than one child and more than one sibling, Drop() will raise an error.
|
||||
* If we want to drop ds4 from the input tree, ds4->Drop() will not work. We will have to do it
|
||||
* with a combination of Drop(), InsertChildAt()
|
||||
*
|
||||
* Input tree:
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6
|
||||
* | / | \
|
||||
* ds8 ds5 ds4 ds1
|
||||
* | / \
|
||||
* ds7 ds3 ds2
|
||||
*
|
||||
* If we want to form this tree below:
|
||||
*
|
||||
* ds10
|
||||
* / \
|
||||
* ds9 ds6_____
|
||||
* | / | | \
|
||||
* ds8 ds5 ds3 ds2 ds1
|
||||
* |
|
||||
* ds7
|
||||
*
|
||||
*/
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
|
||||
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
|
||||
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
|
||||
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
|
||||
std::shared_ptr<Dataset> ds6 = ds1->Concat({ds5, ds4}); // ds1 is put after (ds5, ds4)!!!
|
||||
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
|
||||
Status rc;
|
||||
|
||||
std::shared_ptr<DatasetNode> root = ds10->IRNode();
|
||||
auto ir_tree = std::make_shared<TreeAdapter>();
|
||||
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
|
||||
EXPECT_EQ(rc, Status::OK());
|
||||
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
|
||||
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
|
||||
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
|
||||
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[1];
|
||||
rc = ds4_node->Drop();
|
||||
EXPECT_NE(rc, Status::OK());
|
||||
}
|
Loading…
Reference in New Issue