Tested with new test cases and all dataset UTs passed

This commit is contained in:
Nat Sutyanyong 2021-01-08 22:28:43 -05:00 committed by Lixia Chen
parent a575af7198
commit e28fb6ce4d
14 changed files with 883 additions and 167 deletions

View File

@ -35,6 +35,7 @@ ConcatNode::ConcatNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets
: sampler_(sampler),
children_flag_and_nums_(children_flag_and_nums),
children_start_end_index_(children_start_end_index) {
nary_op_ = true;
for (auto const &child : datasets) AddChild(child);
}

View File

@ -221,15 +221,20 @@ std::shared_ptr<DatasetNode> DatasetNode::SetNumWorkers(int32_t num_workers) {
return shared_from_this();
}
DatasetNode::DatasetNode() : cache_(nullptr), parent_(nullptr), children_({}), dataset_size_(-1) {
DatasetNode::DatasetNode()
: cache_(nullptr),
parent_(nullptr),
children_({}),
dataset_size_(-1),
mappable_(kNotADataSource),
nary_op_(false),
descendant_of_cache_(false) {
// Fetch some default value from config manager
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
connector_que_size_ = cfg->op_connector_size();
worker_connector_size_ = cfg->worker_connector_size();
mappable_ = kNotADataSource;
descendant_of_cache_ = false;
}
std::string DatasetNode::PrintColumns(const std::vector<std::string> &columns) const {
@ -283,95 +288,268 @@ void DatasetNode::AddChild(std::shared_ptr<DatasetNode> child) {
}
}
// Add the input node to be the next child of this node
// This function is used in doing a deep copy of the IR tree built by parsing the user code.
// This function assumes we walk the tree in DFS left-to-right.
// This is a temporary function to be replaced later by a set of better tree operations.
void DatasetNode::AppendChild(std::shared_ptr<DatasetNode> child) {
if (child != nullptr) {
if (child->parent_ != nullptr) {
MS_LOG(WARNING) << "Adding " + child->Name() + " to " + Name() + " but it already has a parent";
}
children_.push_back(child);
child->parent_ = this;
}
/*
* AppendChild(<node>) appending <node> as the last child of this node. The new node must have no parent.
*
* Input tree:
* ds4
* / \
* ds3 ds2
* |
* ds1
*
* ds4->AppendChild(ds6) yields this tree
*
* _ ds4 _
* / | \
* ds3 ds2 ds6
* |
* ds1
*
*/
Status DatasetNode::AppendChild(std::shared_ptr<DatasetNode> child) {
CHECK_FAIL_RETURN_UNEXPECTED(child != nullptr, "Node to append must not be a null pointer.");
CHECK_FAIL_RETURN_UNEXPECTED(child->parent_ == nullptr, "Node to append must have no parent.");
CHECK_FAIL_RETURN_UNEXPECTED((IsUnaryOperator() && Children().empty()) || IsNaryOperator(),
"This node must be a unary operator with no child or an n-ary operator");
children_.push_back(child);
child->parent_ = this;
return Status::OK();
}
// Add a node as a parent, node's parent needs to be empty (future use)
Status DatasetNode::InsertAbove(std::shared_ptr<DatasetNode> node) {
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Inserted node must not be a null pointer.");
/*
* InsertChildAt(<pos>, <node>) inserts the <node> to be at the <pos> index of the vector of its child nodes.
* As in the convention of C++, <pos> starts at position 0.
* If the <pos> is a negative number or larger than the size of the vector minus one, an error is raised.
*/
Status DatasetNode::InsertChildAt(int32_t pos, std::shared_ptr<DatasetNode> child) {
CHECK_FAIL_RETURN_UNEXPECTED(pos > -1 && pos <= children_.size(), "Position must in the range of [0, size]");
CHECK_FAIL_RETURN_UNEXPECTED(child != nullptr, "Node to insert must not be a null pointer.");
CHECK_FAIL_RETURN_UNEXPECTED(child->parent_ == nullptr, "Node to insert must have no parent.");
CHECK_FAIL_RETURN_UNEXPECTED((IsUnaryOperator() && Children().empty()) || IsNaryOperator(),
"This node must be a unary operator with no child or an n-ary operator");
children_.insert(children_.begin() + pos, child);
child->parent_ = this;
return Status::OK();
}
if (node->parent_ != nullptr) {
DatasetNode *parent = node->parent_;
for (auto i = parent->children_.size() - 1; i >= 0; --i) {
if (parent->children_[i] == node) {
parent->children_[i] = static_cast<std::shared_ptr<DatasetNode>>(this);
/*
* Insert the input <node> above this node
* Input tree:
* ds4
* / \
* ds3 ds2
* |
* ds1
*
* Case 1: If we want to insert a new node ds5 between ds4 and ds3, use
* ds3->InsertAbove(ds5)
*
* ds4
* / \
* ds5 ds2
* |
* ds3
* |
* ds1
*
* Case 2: Likewise, ds2->InsertAbove(ds6) yields
*
* ds4
* / \
* ds3 ds6
* | |
* ds1 ds2
*
* Case 3: We can insert a new node between ds3 and ds1 by ds1->InsertAbove(ds7)
*
* ds4
* / \
* ds3 ds2
* |
* ds7
* |
* ds1
*
* InsertAbove() cannot use on the root node of a tree.
*/
Status DatasetNode::InsertAbove(std::shared_ptr<DatasetNode> node) {
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Node to insert must not be a null pointer.");
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Node to insert must have no parent.");
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node must not be the root or a node without parent.");
auto parent = parent_;
// The following fields of these three nodes are changed in this function:
// 1. parent->children_
// 2. node->parent_ and node->children_
// 3. this->parent_
auto current_node_itr = std::find(parent_->children_.begin(), parent_->children_.end(), shared_from_this());
*current_node_itr = node;
node->parent_ = parent;
node->children_.push_back(shared_from_this());
parent_ = node.get();
return Status::OK();
}
/*
* Drop() detaches this node from the tree it is in. Calling Drop() from a standalone node is a no-op.
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
*
* ds7->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* / \
* ds3 ds2
*
* Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
* becomes its parent's child.
*
* ds8->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / | \
* ds7 ds5 ds4 ds1
* / \
* ds3 ds2
*
* Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and the node's
* children become its parent's children.
*
* When the input tree is
*
* ds10
* / \
* ds9 ds6
* | |
* ds8 ds4
* | / \
* ds7 ds3 ds2
*
* ds4->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / \
* ds8 ds3 ds2
* |
* ds7
*
* But if ds6 is not an n-ary operator, ds4->Drop() will raise an error because we cannot add the children of an
* n-ary operator (ds4) to a unary operator (ds6).
*
* Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will be
* squeezed left.
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* ds5->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / \
* ds8 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* Case 5: When the node has more than one child and more than one sibling, Drop() will raise an error.
* If we want to drop ds4 from the input tree, ds4->Drop() will not work. We will have to do it
* with a combination of Drop(), InsertChildAt()
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* If we want to form this tree below:
*
* ds10
* / \
* ds9 ds6_____
* | / | | \
* ds8 ds5 ds3 ds2 ds1
* |
* ds7
*
*/
Status DatasetNode::Drop() {
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node to drop must not be the root or a node without parent.");
CHECK_FAIL_RETURN_UNEXPECTED(!(IsNaryOperator() && parent_->IsUnaryOperator()),
"Trying to drop an n-ary operator that is a child of a unary operator");
CHECK_FAIL_RETURN_UNEXPECTED(!(children_.size() > 1 && parent_->children_.size() > 1),
"This node to drop must not have more than one child and more than one sibling.");
CHECK_FAIL_RETURN_UNEXPECTED(children_.size() == 0 || parent_->children_.size() == 1,
"If this node to drop has children, it must be its parent's only child.");
if (parent_->children_.size() == 1) {
auto parent = parent_;
// Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
// becomes its parent's child.
// This is the most common use case.
if (children_.size() == 1) {
auto child = children_[0];
// Move its child to be its parent's child
parent->children_[0] = child;
child->parent_ = parent;
} else if (children_.empty()) {
// Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
// Remove this node from its parent's child
parent_->children_.clear();
} else if (children_.size() > 1) {
// Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and
// the node's children become its parent's children.
// Remove this node from its parent's child
parent->children_.clear();
// Move its child to be its parent's child
for (auto &child : children_) {
parent->children_.push_back(child);
child->parent_ = parent;
}
}
// And mark itself as an orphan
parent_ = nullptr;
children_.clear();
} else if (children_.empty() && parent_->children_.size() > 1) {
// Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will
// be squeezed left.
auto parent = parent_;
// Remove this node from its parent's child
parent->children_.erase(std::remove(parent->children_.begin(), parent->children_.end(), shared_from_this()),
parent->children_.end()); // removal using "erase remove idiom"
// And mark itself as an orphan
parent_ = nullptr;
children_.clear();
} else {
RETURN_STATUS_UNEXPECTED("Internal error: we should not reach here.");
}
children_.push_back(node);
node->parent_ = this;
return Status::OK();
}
// Insert a node as a child of this node
// This node's children become the children of the inserted node.
Status DatasetNode::InsertBelow(std::shared_ptr<DatasetNode> node) {
CHECK_FAIL_RETURN_UNEXPECTED(node != nullptr, "Inserted node must not be a null pointer.");
CHECK_FAIL_RETURN_UNEXPECTED(node->children_.empty(), "Inserted node must not have any children.");
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Inserted node must not have a parent.");
for (auto child : children_) {
node->children_.push_back(child);
child->parent_ = node.get();
}
// Then establish the new parent-child relationship with the new parent.
children_.clear();
children_.push_back(node);
node->parent_ = this;
return Status::OK();
}
// Insert a node as a child next to this node (future use)
Status DatasetNode::InsertAfter(std::shared_ptr<DatasetNode> node) {
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "This node must have a parent.");
CHECK_FAIL_RETURN_UNEXPECTED(node->parent_ == nullptr, "Inserted node must not have a parent.");
auto size = parent_->children_.size();
// Duplicate the last child to increase the size by 1
parent_->children_.push_back(parent_->children_[size - 1]);
// Shift each child to its right until we found the insertion point, then insert the input node
bool found = false;
for (auto i = parent_->children_.size() - 2; i >= 0; --i) {
if (parent_->children_[i].get() != this) {
parent_->children_[i + 1] = parent_->children_[i];
} else {
parent_->children_[i + 1] = node;
node->parent_ = parent_;
found = true;
break;
}
}
CHECK_FAIL_RETURN_UNEXPECTED(!found, "Insertion point not found.");
return Status::OK();
}
// Remove this node from its parent. Add the child of this node to its parent.
// for now, this remove is limited to node with a single child or no child
Status DatasetNode::Remove() {
CHECK_FAIL_RETURN_UNEXPECTED(parent_ != nullptr, "Cannot remove root or a node without parent.");
CHECK_FAIL_RETURN_UNEXPECTED(children_.size() < 2, "Cannot remove node with more than 1 child.");
if (children_.empty()) { // I am a leaf node, remove me from my parent's children list
parent_->children_.erase(std::remove(parent_->children_.begin(), parent_->children_.end(), shared_from_this()),
parent_->children_.end()); // removal using "erase remove idiom"
} else { // replace my position in my parent's children list with my single child
auto itr = std::find(parent_->children_.begin(), parent_->children_.end(), shared_from_this());
CHECK_FAIL_RETURN_UNEXPECTED(itr != parent_->children_.end(), "I am not in my parent's children list.");
children_[0]->parent_ = parent_; // set my single child's parent ptr to my parent
*itr = std::move(children_[0]); // replace me in my parent's children list with my single child
children_.clear(); // release my single child from my children list
}
parent_ = nullptr;
return Status::OK();
}

View File

@ -140,7 +140,7 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
/// \param out - The output stream to write output to
virtual void Print(std::ostream &out) const = 0;
/// \brief Pure virtual function to make a new copy of the node
/// \brief Pure virtual function to clone a new copy of the node
/// \return The new copy of the node
virtual std::shared_ptr<DatasetNode> Copy() = 0;
@ -187,26 +187,19 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
/// \return The parent node (of a node from a cloned IR tree)
DatasetNode *const Parent() const { return parent_; }
/// \brief Establish a parent-child relationship between this node and the input node.
/// Used when building the IR tree.
void AddChild(std::shared_ptr<DatasetNode> child);
/// \brief Establish a parent-child relationship between this node and the input node.
/// Used during the cloning of the user-input IR tree (temporary use)
void AppendChild(std::shared_ptr<DatasetNode> child);
Status AppendChild(std::shared_ptr<DatasetNode> child);
/// \brief Establish the child-parent relationship between this node and the input node (future use)
/// \brief Insert the input <node> above this node
Status InsertAbove(std::shared_ptr<DatasetNode> node);
/// \brief Insert the input node below this node. This node's children becomes the children of the inserted node.
Status InsertBelow(std::shared_ptr<DatasetNode> node);
/// \brief Add the input node as the next sibling (future use)
Status InsertAfter(std::shared_ptr<DatasetNode> node);
Status InsertChildAt(int32_t pos, std::shared_ptr<DatasetNode> node);
/// \brief detach this node from its parent, add its child (if any) to its parent
/// \return error code, return error if node has more than 1 children
Status Remove();
Status Drop();
/// \brief Check if this node has cache
/// \return True if the data of this node will be cached
@ -216,13 +209,25 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
/// \return True if this is a leaf node.
const bool IsLeaf() const { return children_.empty(); }
/// \brief Check if this node is a unary operator node.
/// \return True if this node is semantically a unary operator node
const bool IsUnaryOperator() const { return (mappable_ == kNotADataSource && !nary_op_); }
/// \brief Check if this node is a n-ary operator node.
/// \return True if this node is semantically a n-ary operator node
const bool IsNaryOperator() const { return (mappable_ == kNotADataSource && nary_op_); }
/// \brief Check if this node is a mappable dataset. Only applicable to leaf nodes
/// \return True if this node is a mappable dataset
const bool IsMappable() const { return (mappable_ == kMappableSource); }
const bool IsMappableDataSource() const { return (mappable_ == kMappableSource); }
/// \brief Check if this node is a non-mappable dataset. Only applicable to leaf nodes
/// \return True if this node is a non-mappable dataset
const bool IsNonMappable() const { return (mappable_ == kNonMappableSource); }
const bool IsNonMappableDataSource() const { return (mappable_ == kNonMappableSource); }
/// \brief Check if this node is a data source node.
/// \return True if this node is a data source node
const bool IsDataSource() const { return (mappable_ == kMappableSource || mappable_ == kNonMappableSource); }
/// \brief Check if this node is not a data source node.
/// \return True if this node is not a data source node
@ -285,11 +290,15 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
int32_t rows_per_buffer_;
int32_t connector_que_size_;
int32_t worker_connector_size_;
// Establish a parent-child relationship between this node and the input node.
// Used only in the constructor of the class and its derived classes.
void AddChild(std::shared_ptr<DatasetNode> child);
std::string PrintColumns(const std::vector<std::string> &columns) const;
Status AddCacheOp(std::vector<std::shared_ptr<DatasetOp>> *node_ops);
void PrintNode(std::ostream &out, int *level) const;
enum DataSource { kNotADataSource = 0, kNonMappableSource = 1, kMappableSource = 2 };
enum DataSource mappable_;
bool nary_op_; // an indicator of whether the current node supports multiple children, true for concat/zip node
bool descendant_of_cache_;
};

View File

@ -28,6 +28,7 @@ namespace mindspore {
namespace dataset {
ZipNode::ZipNode(const std::vector<std::shared_ptr<DatasetNode>> &datasets) {
nary_op_ = true;
for (auto const &child : datasets) AddChild(child);
}

View File

@ -135,7 +135,7 @@ Status CacheValidationPass::Visit(std::shared_ptr<DatasetNode> node, bool *const
// If this node is created to be cached, set the flag.
is_cached_ = true;
}
if (node->IsLeaf() && node->IsMappable()) {
if (node->IsLeaf() && node->IsMappableDataSource()) {
is_mappable_ = true;
}
return Status::OK();

View File

@ -52,7 +52,7 @@ Status DeepCopyPass::Visit(std::shared_ptr<DatasetNode> node, bool *const modifi
new_node->SetNumWorkers(node->num_workers());
// This method below assumes a DFS walk and from the first child to the last child.
// Future: A more robust implementation that does not depend on the above assumption.
parent_->AppendChild(new_node);
RETURN_IF_NOT_OK(parent_->AppendChild(new_node));
// Then set this node to be a new parent to accept a copy of its next child
parent_ = new_node.get();

View File

@ -31,7 +31,7 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod
// Performs finder work for BuildVocabOp that has special rules about epoch control injection
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) {
// The injection is at the child of the root node
injection_point_ = node;
injection_point_ = node->Children()[0];
num_epochs_ = node->num_epochs();
return Status::OK();
}
@ -53,7 +53,7 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabN
Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
// Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here.
// Move the injection point to the child of this node.
injection_point_ = node;
injection_point_ = node->Children()[0];
return Status::OK();
}
@ -71,12 +71,11 @@ Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *cons
// The first injection logic is to check if we should inject the epoch control op as the root node.
// Do not inject the op if the number of epochs is 1.
std::shared_ptr<DatasetNode> parent = finder.injection_point();
std::shared_ptr<DatasetNode> node = finder.injection_point();
int32_t num_epochs = finder.num_epochs();
if (num_epochs != 1 && parent != nullptr) {
CHECK_FAIL_RETURN_UNEXPECTED(parent->Children().size() == 1, "EpochCtrl must be injected on only one child.");
if (num_epochs != 1 && node != nullptr) {
auto epoch_ctrl_node = std::make_shared<EpochCtrlNode>(num_epochs);
RETURN_IF_NOT_OK(parent->InsertBelow(epoch_ctrl_node));
RETURN_IF_NOT_OK(node->InsertAbove(epoch_ctrl_node));
}
MS_LOG(INFO) << "Pre pass: Injection pass complete.";
return Status::OK();

View File

@ -27,7 +27,7 @@ Status InputValidationPass::Visit(std::shared_ptr<DatasetNode> node, bool *const
RETURN_IF_NOT_OK(node->ValidateParams());
// A data source node must be a leaf node
if ((node->IsMappable() || node->IsNonMappable()) && !node->IsLeaf()) {
if ((node->IsMappableDataSource() || node->IsNonMappableDataSource()) && !node->IsLeaf()) {
std::string err_msg = node->Name() + " is a data source and must be a leaf node.";
RETURN_STATUS_UNEXPECTED(err_msg);
}

View File

@ -14,41 +14,15 @@
* limitations under the License.
*/
#include <vector>
#include <algorithm>
#include "minddata/dataset/engine/opt/pre/node_removal_pass.h"
#include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
#include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
#include "minddata/dataset/engine/ir/datasetops/skip_node.h"
#include "minddata/dataset/engine/ir/datasetops/take_node.h"
namespace mindspore {
namespace dataset {
NodeRemovalPass::RemovalNodes::RemovalNodes() : is_caching_(false) {}
// Identifies the subtree below this node as a cached descendant tree.
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<DatasetNode> node, bool *const modified) {
*modified = false;
MS_LOG(INFO) << "Node removal pass: Operation with cache found, identified descendant tree.";
if (node->IsCached()) {
is_caching_ = true;
}
return Status::OK();
}
// Resets the tracking of the cache within the tree
Status NodeRemovalPass::RemovalNodes::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) {
*modified = false;
MS_LOG(INFO) << "Removal pass: Descendant walk is complete.";
if (is_caching_ && node->IsLeaf()) {
// Mark this leaf node to indicate it is a descendant of an operator with cache.
// This is currently used in non-mappable dataset (leaf) nodes to not add a ShuffleOp in DatasetNode::Build().
node->HasCacheAbove();
}
is_caching_ = false;
return Status::OK();
}
NodeRemovalPass::RemovalNodes::RemovalNodes() {}
// Perform RepeatNode removal check.
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) {
@ -59,12 +33,6 @@ Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<RepeatNode> node, bo
return Status::OK();
}
// Perform ShuffleNode removal check.
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<ShuffleNode> node, bool *const modified) {
*modified = false;
return Status::OK();
}
// Perform SkipNode removal check.
Status NodeRemovalPass::RemovalNodes::Visit(std::shared_ptr<SkipNode> node, bool *const modified) {
*modified = false;
@ -95,7 +63,7 @@ Status NodeRemovalPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *co
// Then, execute the removal of any nodes that were set up for removal
for (auto node : removal_nodes->nodes_to_remove()) {
RETURN_IF_NOT_OK(node->Remove());
RETURN_IF_NOT_OK(node->Drop());
}
MS_LOG(INFO) << "Pre pass: node removal pass complete.";
return Status::OK();

View File

@ -31,7 +31,7 @@ class DatasetOp;
/// nodes should be removed, and then removes them.
class NodeRemovalPass : public IRTreePass {
/// \class RemovalNodes
/// \brief This is a NodePass who's job is to identify which nodes should be removed.
/// \brief This is a NodePass whose job is to identify which nodes should be removed.
/// It works in conjunction with the removal_pass.
class RemovalNodes : public IRNodePass {
public:
@ -42,30 +42,12 @@ class NodeRemovalPass : public IRTreePass {
/// \brief Destructor
~RemovalNodes() = default;
/// \brief Identifies the subtree below this node as a cached descendant tree.
/// \param[in] node The node being visited
/// \param[inout] modified Indicator if the node was changed at all
/// \return Status The status code returned
Status Visit(std::shared_ptr<DatasetNode> node, bool *const modified) override;
/// \brief Resets the tracking of the cache within the tree
/// \param[in] node The node being visited
/// \param[inout] modified Indicator if the node was changed at all
/// \return Status The status code returned
Status VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) override;
/// \brief Perform RepeatNode removal check
/// \param[in] node The node being visited
/// \param[inout] modified Indicator if the node was changed at all
/// \return Status The status code returned
Status Visit(std::shared_ptr<RepeatNode> node, bool *const modified) override;
/// \brief Perform ShuffleNode removal check
/// \param[in] node The node being visited
/// \param[inout] modified Indicator if the node was changed at all
/// \return Status The status code returned
Status Visit(std::shared_ptr<ShuffleNode> node, bool *const modified) override;
/// \brief Perform SkipNode removal check
/// \param[in] node The node being visited
/// \param[inout] modified Indicator if the node was changed at all
@ -83,7 +65,6 @@ class NodeRemovalPass : public IRTreePass {
std::vector<std::shared_ptr<DatasetNode>> nodes_to_remove() { return nodes_to_remove_; }
private:
bool is_caching_;
std::vector<std::shared_ptr<DatasetNode>> nodes_to_remove_;
};

View File

@ -187,8 +187,10 @@ Status TreeAdapter::Compile(std::shared_ptr<DatasetNode> input_ir, int32_t num_e
tree_state_ = kCompileStateOptimized;
MS_LOG(INFO) << "Plan after optimization:" << '\n' << *root_ir << '\n';
// Remember the root node
root_ir_ = root_ir;
RETURN_IF_NOT_OK(Build(root_ir, num_epochs));
RETURN_IF_NOT_OK(Build(root_ir_, num_epochs));
tree_state_ = kCompileStateReady;
return Status::OK();

View File

@ -46,6 +46,9 @@ class TreeAdapter {
// the Execution tree.
Status Compile(std::shared_ptr<DatasetNode> root_ir, int32_t num_epochs = -1);
// Return the root node of the IR after cloned from the parsed IR tree
std::shared_ptr<DatasetNode> RootIRNode() const { return root_ir_; }
// This is the main method TreeConsumer uses to interact with TreeAdapter
// 1. GetNext will Launch() the ExeTree on its first call by iterator (tree is already prepared)
// 2. GetNext will return empty row when eoe/eof is obtained
@ -87,6 +90,7 @@ class TreeAdapter {
std::unique_ptr<DataBuffer> cur_db_;
std::unordered_map<std::string, int32_t> column_name_map_;
std::shared_ptr<DatasetNode> root_ir_;
std::unique_ptr<ExecutionTree> tree_; // current connector capacity of root op, used for profiling
bool optimize_; // Flag to enable optional optimization pass
std::shared_ptr<DatasetIteratorTracing> tracing_; // trace profiling data

View File

@ -131,13 +131,14 @@ SET(DE_UT_SRCS
to_float16_op_test.cc
tokenizer_op_test.cc
treap_test.cc
tree_modifying_function_test.cc
trucate_pair_test.cc
type_cast_op_test.cc
weighted_random_sampler_test.cc
zip_op_test.cc
)
if (ENABLE_PYTHON)
if(ENABLE_PYTHON)
set(DE_UT_SRCS
${DE_UT_SRCS}
filter_op_test.cc
@ -145,13 +146,18 @@ if (ENABLE_PYTHON)
voc_op_test.cc
sentence_piece_vocab_op_test.cc
)
endif ()
endif()
add_executable(de_ut_tests ${DE_UT_SRCS})
set_target_properties(de_ut_tests PROPERTIES INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/../lib64")
target_link_libraries(de_ut_tests PRIVATE _c_dataengine pybind11::embed ${GTEST_LIBRARY} ${SECUREC_LIBRARY} ${SLOG_LIBRARY})
target_link_libraries(de_ut_tests PRIVATE
_c_dataengine pybind11::embed
${GTEST_LIBRARY}
${SECUREC_LIBRARY}
${SLOG_LIBRARY}
)
gtest_discover_tests(de_ut_tests WORKING_DIRECTORY ${Project_DIR}/tests/dataset)

View File

@ -0,0 +1,567 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include <string>
#include "common/common.h"
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
#include "minddata/dataset/engine/ir/datasetops/skip_node.h"
#include "minddata/dataset/engine/ir/datasetops/take_node.h"
#include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
#include "minddata/dataset/include/datasets.h"
using namespace mindspore::dataset;
class MindDataTestTreeModifying : public UT::DatasetOpTesting {
public:
MindDataTestTreeModifying() = default;
};
TEST_F(MindDataTestTreeModifying, AppendChild) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-AppendChild";
/*
* Input tree:
* ds4
* / \
* ds3 ds2
* |
* ds1
*
* ds4->AppendChild(ds6) yields this tree
*
* _ ds4 _
* / | \
* ds3 ds2 ds6
* |
* ds1
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
// You can inspect the plan by sending *ir_tree->RootIRNode() to std::cout
std::shared_ptr<DatasetNode> node_to_insert = ds6->IRNode();
rc = ds4_node->AppendChild(node_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[2] == node_to_insert);
EXPECT_TRUE(node_to_insert->Parent() == ds4_node.get());
}
TEST_F(MindDataTestTreeModifying, InsertChildAt01) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertChildAt01";
/*
* Input tree:
* ds4
* / \
* ds3 ds2
* | |
* ds1 ds5
*
* Case 1: ds4->InsertChildAt(1, ds6) yields this tree
*
* _ ds4 _
* / | \
* ds3 ds6 ds2
* | |
* ds1 ds5
*
* Case 2: ds4->InsertChildAt(0, ds6) yields this tree
*
* _ ds4 _
* / | \
* ds6 ds3 ds2
* | |
* ds1 ds5
*
* Case 3: ds4->InsertChildAt(2, ds6) yields this tree
*
* _ ds4 _
* / | \
* ds3 ds2 ds6
* | |
* ds1 ds5
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ds5->Repeat(4);
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
// Case 1:
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<DatasetNode> ds6_to_insert = ds6->IRNode();
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
rc = ds4_node->InsertChildAt(1, ds6_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[1] == ds6_to_insert);
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
EXPECT_TRUE( ds4_node->Children()[2] == ds2_node);
// Case 2:
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
ds6_to_insert = ds6->IRNode();
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
rc = ds4_node->InsertChildAt(0, ds6_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[0] == ds6_to_insert);
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
EXPECT_TRUE( ds4_node->Children()[1] == ds3_node);
// Case 3:
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
ds6_to_insert = ds6->IRNode();
rc = ds4_node->InsertChildAt(2, ds6_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[2] == ds6_to_insert);
EXPECT_TRUE(ds6_to_insert->Parent() == ds4_node.get());
}
TEST_F(MindDataTestTreeModifying, InsertChildAt04) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertChildAt04";
/*
* Input tree:
* ds4
* / \
* ds3 ds2
* | |
* ds1 ds5
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ds5->Repeat(4);
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
// Case 4: ds4->InsertChildAt(3, ds6) raises an error
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<Dataset> ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<DatasetNode> ds6_to_insert = ds6->IRNode();
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
rc = ds4_node->InsertChildAt(3, ds6_to_insert);
EXPECT_NE(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[0] == ds3_node);
EXPECT_TRUE( ds4_node->Children()[1] == ds2_node);
// Case 5: ds4->InsertChildAt(-1, ds6) raises an error
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
ds6 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
ds6_to_insert = ds6->IRNode();
ds3_node = ds4_node->Children()[0];
ds2_node = ds4_node->Children()[1];
rc = ds4_node->InsertChildAt(-1, ds6_to_insert);
EXPECT_NE(rc, Status::OK());
EXPECT_TRUE( ds4_node->Children()[0] == ds3_node);
EXPECT_TRUE( ds4_node->Children()[1] == ds2_node);
}
TEST_F(MindDataTestTreeModifying, InsertAbove01) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove01";
/*
* Insert the input <node> above this node
* Input tree:
* ds4
* / \
* ds3 ds2
* |
* ds1
*
* Case 1: If we want to insert a new node ds5 between ds4 and ds3, use
* ds3->InsertAbove(ds5)
*
* ds4
* / \
* ds5 ds2
* |
* ds3
* |
* ds1
*
* Case 2: Likewise, ds2->InsertAbove(ds6) yields
*
* ds4
* / \
* ds3 ds6
* | |
* ds1 ds2
*
* Case 3: We can insert a new node between ds3 and ds1 by ds1->InsertAbove(ds7)
*
* ds4
* / \
* ds3 ds2
* |
* ds7
* |
* ds1
*
*/
// Case 1
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
std::shared_ptr<SkipNode> ds5_to_insert = std::make_shared<SkipNode>(nullptr, 1);
rc = ds3_node->InsertAbove(ds5_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE(ds5_to_insert->Children()[0] == ds3_node);
EXPECT_TRUE( ds3_node->Parent() == ds5_to_insert.get());
EXPECT_TRUE( ds4_node->Children()[0] == ds5_to_insert);
EXPECT_TRUE( ds5_to_insert->Parent() == ds4_node.get());
}
TEST_F(MindDataTestTreeModifying, InsertAbove02) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove02";
// Case 2
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds4 = ds2 + ds3; // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
std::shared_ptr<TakeNode> ds6_to_insert = std::make_shared<TakeNode>(nullptr, 12);
rc = ds2_node->InsertAbove(ds6_to_insert);
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE(ds6_to_insert->Children()[0] == ds2_node);
EXPECT_TRUE( ds2_node->Parent() == ds6_to_insert.get());
EXPECT_TRUE( ds4_node->Children()[1] == ds6_to_insert);
EXPECT_TRUE( ds6_to_insert->Parent() == ds4_node.get());
}
TEST_F(MindDataTestTreeModifying, InsertAbove03) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-InsertAbove03";
// Case 3
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds3 = ds1->Take(10);
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
Status rc;
std::shared_ptr<DatasetNode> root = ds4->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds4_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
std::shared_ptr<DatasetNode> ds1_node = ds3_node->Children()[0];
std::shared_ptr<RepeatNode> ds7_to_insert = std::make_shared<RepeatNode>(nullptr, 3);
rc = ds1_node->InsertAbove(ds7_to_insert);
EXPECT_TRUE(ds7_to_insert->Children()[0] == ds1_node);
EXPECT_TRUE( ds1_node->Parent() == ds7_to_insert.get());
EXPECT_TRUE( ds3_node->Children()[0] == ds7_to_insert);
EXPECT_TRUE( ds7_to_insert->Parent() == ds3_node.get());
}
TEST_F(MindDataTestTreeModifying, Drop01) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop01";
/*
* Drop() detaches this node from the tree it is in. Calling Drop() from a standalone node is a no-op.
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* Case 1: When the node has no child and no sibling, Drop() detaches the node from its tree.
*
* ds7->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* / \
* ds3 ds2
*
* Case 2: When the node has one child and no sibling, Drop() detaches the node from its tree and the node's child
* becomes its parent's child.
*
* ds8->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / | \
* ds7 ds5 ds4 ds1
* / \
* ds3 ds2
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
std::shared_ptr<Dataset> ds6 = ds4->Take(13);
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
Status rc;
std::shared_ptr<DatasetNode> root = ds10->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
// Case 1
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds9_node = ds10_node->Children()[0];
std::shared_ptr<DatasetNode> ds8_node = ds9_node->Children()[0];
std::shared_ptr<DatasetNode> ds7_node = ds8_node->Children()[0];
rc = ds7_node->Drop();
EXPECT_EQ(rc, Status::OK());
// ds8 becomes a childless node
EXPECT_TRUE(ds8_node->Children().empty());
EXPECT_TRUE(ds7_node->Parent() == nullptr);
EXPECT_TRUE(ds7_node->Children().empty());
// Case 2
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
ds9_node = ds10_node->Children()[0];
ds8_node = ds9_node->Children()[0];
ds7_node = ds8_node->Children()[0];
rc = ds8_node->Drop();
EXPECT_EQ(rc, Status::OK());
// ds7 becomes a child of ds9
EXPECT_TRUE(ds9_node->Children()[0] == ds7_node);
EXPECT_TRUE(ds7_node->Parent() == ds9_node.get());
EXPECT_TRUE(ds8_node->Parent() == nullptr);
EXPECT_TRUE(ds8_node->Children().empty());
}
TEST_F(MindDataTestTreeModifying, Drop03) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop03";
/* Case 3: When the node has more than one child and no sibling, Drop() detaches the node from its tree and the node's
* children become its parent's children.
*
* When the input tree is
* ds10
* / \
* ds9 ds6
* | |
* ds8 ds4
* | / \
* ds7 ds3 ds2
*
*
* ds4->Drop() will raise an error because we cannot add the children of an n-ary operator (ds4) to a unary operator
* (ds6).
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
std::shared_ptr<Dataset> ds6 = ds4->Take(13);
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
Status rc;
std::shared_ptr<DatasetNode> root = ds10->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[0];
std::shared_ptr<DatasetNode> ds3_node = ds4_node->Children()[0];
std::shared_ptr<DatasetNode> ds2_node = ds4_node->Children()[1];
rc = ds4_node->Drop();
EXPECT_NE(rc, Status::OK());
}
TEST_F(MindDataTestTreeModifying, Drop04) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop04";
/* Case 4: When the node has no child but has siblings, Drop() detaches the node from its tree and its siblings will be
* squeezed left.
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* ds5->Drop() yields the tree below:
*
* ds10
* / \
* ds9 ds6
* | / \
* ds8 ds4 ds1
* | / \
* ds7 ds3 ds2
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds6 = ds1->Concat({ds5, ds4}); // ds1 is put after (ds5, ds4)!!!
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
Status rc;
std::shared_ptr<DatasetNode> root = ds10->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
std::shared_ptr<DatasetNode> ds5_node = ds6_node->Children()[0];
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[1];
EXPECT_TRUE(ds5_node->IsDataSource());
EXPECT_TRUE(ds6_node->IsNaryOperator());
rc = ds5_node->Drop();
EXPECT_EQ(rc, Status::OK());
EXPECT_TRUE(ds6_node->Children().size() == 2);
EXPECT_TRUE(ds6_node->Children()[0] == ds4_node);
EXPECT_TRUE(ds4_node->Parent() == ds6_node.get());
EXPECT_TRUE(ds5_node->Parent() == nullptr);
EXPECT_TRUE(ds5_node->Children().empty());
}
TEST_F(MindDataTestTreeModifying, Drop05) {
MS_LOG(INFO) << "Doing MindDataTestTreeModifying-Drop05";
/*
* Case 5: When the node has more than one child and more than one sibling, Drop() will raise an error.
* If we want to drop ds4 from the input tree, ds4->Drop() will not work. We will have to do it
* with a combination of Drop(), InsertChildAt()
*
* Input tree:
* ds10
* / \
* ds9 ds6
* | / | \
* ds8 ds5 ds4 ds1
* | / \
* ds7 ds3 ds2
*
* If we want to form this tree below:
*
* ds10
* / \
* ds9 ds6_____
* | / | | \
* ds8 ds5 ds3 ds2 ds1
* |
* ds7
*
*/
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds7 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds8 = ds7->Take(20);
std::shared_ptr<Dataset> ds9 = ds8->Skip(1);
std::shared_ptr<Dataset> ds3 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds4 = ds2->Concat({ds3}); // ds2 is the second child and ds3 is the first child!!!
std::shared_ptr<Dataset> ds5 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, SequentialSampler(0, 11));
std::shared_ptr<Dataset> ds6 = ds1->Concat({ds5, ds4}); // ds1 is put after (ds5, ds4)!!!
std::shared_ptr<Dataset> ds10 = ds6 + ds9;
Status rc;
std::shared_ptr<DatasetNode> root = ds10->IRNode();
auto ir_tree = std::make_shared<TreeAdapter>();
rc = ir_tree->Compile(root); // Compile adds a new RootNode to the top of the tree
EXPECT_EQ(rc, Status::OK());
// Descend two levels as Compile adds the root node and the epochctrl node on top of ds4
std::shared_ptr<DatasetNode> ds10_node = ir_tree->RootIRNode()->Children()[0]->Children()[0];
std::shared_ptr<DatasetNode> ds6_node = ds10_node->Children()[1];
std::shared_ptr<DatasetNode> ds4_node = ds6_node->Children()[1];
rc = ds4_node->Drop();
EXPECT_NE(rc, Status::OK());
}