forked from mindspore-Ecosystem/mindspore
commit
f333b03d4d
|
@ -302,10 +302,10 @@ void GenKernelIoExecInfoMap(const NotNull<KernelGraphPtr> &kernel_graph,
|
|||
}
|
||||
}
|
||||
|
||||
void AscendStreamAssign::InsertEventsForInputs(const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel,
|
||||
const NodeIoExecInfoPtr &io_exec_info,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_send,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_recv) {
|
||||
void AscendStreamAssign::InsertEventsForInputs(
|
||||
const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel, const NodeIoExecInfoPtr &io_exec_info,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_send,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_recv) const {
|
||||
auto process_stream_id = AnfAlgo::GetStreamId(kernel);
|
||||
auto input_exec_info_list = io_exec_info->inputs;
|
||||
mindspore::HashMap<uint32_t, NodeExecInfoPtr> stream_max_exec_node_map;
|
||||
|
@ -333,10 +333,10 @@ void AscendStreamAssign::InsertEventsForInputs(const NotNull<KernelGraphPtr> &ke
|
|||
}
|
||||
}
|
||||
|
||||
void AscendStreamAssign::InsertEventsForOutputs(const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel,
|
||||
const NodeIoExecInfoPtr &io_exec_info,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_send,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_recv) {
|
||||
void AscendStreamAssign::InsertEventsForOutputs(
|
||||
const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel, const NodeIoExecInfoPtr &io_exec_info,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_send,
|
||||
mindspore::HashMap<AnfNodePtr, std::vector<CNodePtr>> *kernel_recv) const {
|
||||
auto process_stream_id = AnfAlgo::GetStreamId(kernel);
|
||||
auto output_exec_info_list = io_exec_info->outputs;
|
||||
mindspore::HashMap<uint32_t, NodeExecInfoPtr> stream_min_exec_node_map;
|
||||
|
@ -731,7 +731,8 @@ CNodePtr AscendStreamAssign::GetCNodesNeededMoved(vector<CNodePtr> *moved_backwa
|
|||
}
|
||||
|
||||
CNodePtr AscendStreamAssign::GetTargetOutputNode(const vector<CNodePtr> &moved_backward_cnodes,
|
||||
const CNodePtr first_node, const NotNull<KernelGraphPtr> &graph_ptr) {
|
||||
const CNodePtr first_node,
|
||||
const NotNull<KernelGraphPtr> &graph_ptr) const {
|
||||
auto cnode_ptr_list = graph_ptr->execution_order();
|
||||
if (moved_backward_cnodes.empty() || !first_node) {
|
||||
return nullptr;
|
||||
|
@ -770,7 +771,7 @@ CNodePtr AscendStreamAssign::GetTargetOutputNode(const vector<CNodePtr> &moved_b
|
|||
return first_output_node_ptr;
|
||||
}
|
||||
|
||||
bool AscendStreamAssign::FinetuneSubgraphExecOrder(vector<CNodePtr> *cnodes) {
|
||||
bool AscendStreamAssign::FinetuneSubgraphExecOrder(vector<CNodePtr> *cnodes) const {
|
||||
MS_EXCEPTION_IF_NULL(cnodes);
|
||||
auto hcom_pos = find_if(cnodes->begin(), cnodes->end(), [](const CNodePtr &node_ptr) -> bool {
|
||||
return common::AnfAlgo::GetCNodeName(node_ptr) == "AllReduce";
|
||||
|
@ -1005,7 +1006,7 @@ void AscendStreamAssign::ClassifyNodeByGroupAndGraph(const std::vector<CNodePtr>
|
|||
}
|
||||
}
|
||||
|
||||
std::set<uint32_t> AscendStreamAssign::AssignNodeStreamInOrder(const std::vector<CNodePtr> node_list) {
|
||||
std::set<uint32_t> AscendStreamAssign::AssignNodeStreamInOrder(const std::vector<CNodePtr> node_list) const {
|
||||
AscendStreamMng &resource_manager = AscendStreamMng::GetInstance();
|
||||
auto cur_stream_id = resource_manager.ApplyNewStream();
|
||||
std::map<uint32_t, uint32_t> stream_task_map;
|
||||
|
@ -1584,7 +1585,7 @@ void AscendStreamAssign::InsertRecvForNotLoopSink(const NotNull<KernelGraphPtr>
|
|||
}
|
||||
}
|
||||
|
||||
void AscendStreamAssign::GraphLoopSync(const NotNull<KernelGraphPtr> &root_graph, uint32_t graph_id) {
|
||||
void AscendStreamAssign::GraphLoopSync(const NotNull<KernelGraphPtr> &root_graph, uint32_t graph_id) const {
|
||||
if (ExistStreamSendAfterLastHcomNode(root_graph, graph_id)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1733,7 +1734,7 @@ void AscendStreamAssign::InsertEventHcomDependCommonBak(const NotNull<KernelGrap
|
|||
}
|
||||
|
||||
vector<CNodePtr> AscendStreamAssign::GetLastInputCnode(const NotNull<KernelGraphPtr> &graph_ptr,
|
||||
const CNodePtr &cur_cnode_ptr) {
|
||||
const CNodePtr &cur_cnode_ptr) const {
|
||||
auto group_name = GetHcomGroup(cur_cnode_ptr);
|
||||
auto input_cnodes = GetInputKernels(cur_cnode_ptr);
|
||||
if (input_cnodes.empty()) {
|
||||
|
@ -1894,7 +1895,8 @@ std::vector<std::pair<uint32_t, vector<size_t>>> AscendStreamAssign::GetStreamID
|
|||
}
|
||||
|
||||
void AscendStreamAssign::InsertEventHcomDependHcomAtSameGroup(
|
||||
const NotNull<KernelGraphPtr> &graph_ptr, std::pair<std::string, std::map<uint32_t, std::set<uint32_t>>> group_item) {
|
||||
const NotNull<KernelGraphPtr> &graph_ptr,
|
||||
std::pair<std::string, std::map<uint32_t, std::set<uint32_t>>> group_item) const {
|
||||
for (const auto &graph_item : std::as_const(group_item.second)) {
|
||||
auto stream_indices = GetStreamIDHcomMap(graph_ptr->execution_order(), group_item.first, graph_item.first);
|
||||
constexpr size_t kStreamMax = 2;
|
||||
|
|
|
@ -87,7 +87,7 @@ class AscendStreamAssign {
|
|||
~AscendStreamAssign() = default;
|
||||
|
||||
void AssignAllNodesStream(const NotNull<KernelGraphPtr> &graph_ptr);
|
||||
std::set<uint32_t> AssignNodeStreamInOrder(const std::vector<CNodePtr> node_list);
|
||||
std::set<uint32_t> AssignNodeStreamInOrder(const std::vector<CNodePtr> node_list) const;
|
||||
void ClassifyNodeByKernel(const NotNull<KernelGraphPtr> &graph_ptr, std::vector<CNodePtr> *common_list,
|
||||
std::vector<CNodePtr> *hcom_list, std::vector<CNodePtr> *independent_list,
|
||||
std::vector<CNodePtr> *comm_sub_graph_list) const;
|
||||
|
@ -124,8 +124,9 @@ class AscendStreamAssign {
|
|||
void InsertEventBetweenHcom(const NotNull<KernelGraphPtr> &graph_ptr,
|
||||
const std::vector<std::pair<uint32_t, vector<size_t>>> &hcom_index) const;
|
||||
void InsertEventForCallCommSubGraph(const NotNull<KernelGraphPtr> &graph_ptr) const;
|
||||
void InsertEventHcomDependHcomAtSameGroup(const NotNull<KernelGraphPtr> &graph_ptr,
|
||||
std::pair<std::string, std::map<uint32_t, std::set<uint32_t>>> group_item);
|
||||
void InsertEventHcomDependHcomAtSameGroup(
|
||||
const NotNull<KernelGraphPtr> &graph_ptr,
|
||||
std::pair<std::string, std::map<uint32_t, std::set<uint32_t>>> group_item) const;
|
||||
void InsertRecvForLoopSink(const NotNull<KernelGraphPtr> &root_graph, std::vector<CNodePtr> *cnodes,
|
||||
uint32_t cur_event_id, uint32_t graph_id) const;
|
||||
void InsertRecvForNotLoopSink(const NotNull<KernelGraphPtr> &root_graph, std::vector<CNodePtr> *cnodes,
|
||||
|
@ -134,7 +135,7 @@ class AscendStreamAssign {
|
|||
const std::string group, size_t graph_id) const;
|
||||
|
||||
void AdjustAtomicAddrCleanOrder(const NotNull<KernelGraphPtr> &graph_ptr) const;
|
||||
vector<CNodePtr> GetLastInputCnode(const NotNull<KernelGraphPtr> &graph_ptr, const CNodePtr &cur_cnode_ptr);
|
||||
vector<CNodePtr> GetLastInputCnode(const NotNull<KernelGraphPtr> &graph_ptr, const CNodePtr &cur_cnode_ptr) const;
|
||||
vector<CNodePtr> GetIndependentNodesNeedsInsertActive(const std::vector<CNodePtr> exe_orders,
|
||||
const uint32_t graph_id) const;
|
||||
bool IsSatisfiedHcom(const std::vector<std::pair<uint32_t, vector<size_t>>> &hcom_index, const CNodePtr &node_ptr,
|
||||
|
@ -148,8 +149,8 @@ class AscendStreamAssign {
|
|||
CNodePtr GetCNodesNeededMoved(vector<CNodePtr> *moved_backward_cnodes, vector<CNodePtr> *moved_forward_cnodes,
|
||||
const vector<CNodePtr> &last_grad_and_status, const NotNull<KernelGraphPtr> &graph_ptr);
|
||||
CNodePtr GetTargetOutputNode(const vector<CNodePtr> &moved_backward_cnodes, const CNodePtr first_node,
|
||||
const NotNull<KernelGraphPtr> &graph_ptr);
|
||||
bool FinetuneSubgraphExecOrder(vector<CNodePtr> *cnodes);
|
||||
const NotNull<KernelGraphPtr> &graph_ptr) const;
|
||||
bool FinetuneSubgraphExecOrder(vector<CNodePtr> *cnodes) const;
|
||||
void TrailingTimeOptimizationByReorder(const NotNull<KernelGraphPtr> &graph_ptr);
|
||||
|
||||
uint32_t GetMaxIndexTarget(const NotNull<KernelGraphPtr> &graph_ptr);
|
||||
|
@ -185,7 +186,7 @@ class AscendStreamAssign {
|
|||
|
||||
bool ExistStreamSendAfterLastHcomNode(const NotNull<KernelGraphPtr> &graph_ptr, uint32_t graph_id) const;
|
||||
void GetAllGraphID(const NotNull<KernelGraphPtr> &graph_ptr, std::vector<uint32_t> *graphs_id);
|
||||
void GraphLoopSync(const NotNull<KernelGraphPtr> &root_graph, uint32_t graph_id);
|
||||
void GraphLoopSync(const NotNull<KernelGraphPtr> &root_graph, uint32_t graph_id) const;
|
||||
|
||||
void InsertEventForMicroBatchIndependent(const NotNull<KernelGraphPtr> &graph_ptr) const;
|
||||
|
||||
|
@ -231,11 +232,11 @@ class AscendStreamAssign {
|
|||
|
||||
void InsertEventsForInputs(const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel,
|
||||
const NodeIoExecInfoPtr &io_exec_info, HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_send,
|
||||
HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_recv);
|
||||
HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_recv) const;
|
||||
|
||||
void InsertEventsForOutputs(const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr &kernel,
|
||||
const NodeIoExecInfoPtr &io_exec_info, HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_send,
|
||||
HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_recv);
|
||||
HashMap<AnfNodePtr, vector<CNodePtr>> *kernel_recv) const;
|
||||
|
||||
void InsertEvents(const NotNull<KernelGraphPtr> &kernel_graph, const CNodePtr ¶llel_cnode,
|
||||
const AnfNodePtr &node_before_send, HashMap<mindspore::AnfNodePtr, vector<CNodePtr>> *kernel_send,
|
||||
|
|
Loading…
Reference in New Issue