From d8e72af6583da4cae3834b9436b8f878d6a1de11 Mon Sep 17 00:00:00 2001 From: kswang Date: Tue, 20 Oct 2020 17:10:28 +0800 Subject: [PATCH] optimize find input need lock tensor --- .../ccsrc/backend/session/ascend_session.cc | 2 ++ mindspore/ccsrc/backend/session/executor.cc | 2 +- .../ccsrc/backend/session/gpu_session.cc | 6 +---- .../ccsrc/backend/session/kernel_graph.cc | 19 ++++++++++++++++ .../ccsrc/backend/session/kernel_graph.h | 14 ++++++++---- .../ccsrc/backend/session/session_basic.cc | 22 ++++++------------- .../ccsrc/backend/session/session_basic.h | 2 +- 7 files changed, 41 insertions(+), 26 deletions(-) diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 09bd1134c1f..e23e1ce3dae 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -187,6 +187,8 @@ GraphId AscendSession::CompileGraphImpl(NotNull func_graph) { MemoryAlloc(root_graph.get()); // generate and load task into device Load(root_graph); + root_graph->SetInputNodes(); + root_graph->SetOptimizerFlag(); DumpAllGraphs(all_graphs); // return the root_graph id to backend auto graph_id = root_graph->graph_id(); diff --git a/mindspore/ccsrc/backend/session/executor.cc b/mindspore/ccsrc/backend/session/executor.cc index 06dbd6c0e25..dd5841a6239 100644 --- a/mindspore/ccsrc/backend/session/executor.cc +++ b/mindspore/ccsrc/backend/session/executor.cc @@ -271,7 +271,7 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id, task->session_ = session; task->graph_id_ = graph_id; task->input_tensors_ = inputs; - task->input_need_lock_tensors_ = session->GetNeedLockInputTensors(graph_id, inputs); + task->input_need_lock_tensors_ = session->GetInputNeedLockTensors(graph_id, inputs); for (auto &tensor : inputs) { if (tensor->NeedWait()) { if (tensor->IsGraphOutput()) { diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 56a39a357a5..7cfd612b38f 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -169,11 +169,7 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const { std::vector inputs(inputs_const); MS_EXCEPTION_IF_NULL(kernel_graph); - std::vector input_nodes; - for (const auto &input_node : kernel_graph->inputs()) { - auto params = AnfAlgo::GetAllOutput(input_node); - std::copy(params.begin(), params.end(), std::back_inserter(input_nodes)); - } + auto &input_nodes = kernel_graph->input_nodes(); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); if (inputs.size() != input_nodes.size()) { diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc index 5d55821c974..415403f9d74 100644 --- a/mindspore/ccsrc/backend/session/kernel_graph.cc +++ b/mindspore/ccsrc/backend/session/kernel_graph.cc @@ -1212,6 +1212,25 @@ void KernelGraph::UpdateGraphDynamicAttr() { is_dynamic_shape_ = false; } +void KernelGraph::SetInputNodes() { + input_nodes_.clear(); + for (const auto &input_node : inputs()) { + auto params = AnfAlgo::GetAllOutput(input_node); + std::copy(params.begin(), params.end(), std::back_inserter(input_nodes_)); + } +} + +void KernelGraph::SetOptimizerFlag() { + has_optimizer_ = false; + for (const auto &cnode : execution_order_) { + MS_EXCEPTION_IF_NULL(cnode); + if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) { + has_optimizer_ = true; + return; + } + } +} + std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); } KernelGraph::~KernelGraph() { diff --git a/mindspore/ccsrc/backend/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h index ef9b51ce45e..3d552cc2280 100644 --- a/mindspore/ccsrc/backend/session/kernel_graph.h +++ b/mindspore/ccsrc/backend/session/kernel_graph.h @@ -185,6 +185,10 @@ class KernelGraph : public FuncGraph { void UpdateGraphDynamicAttr(); bool is_dynamic_shape() const { return is_dynamic_shape_; } + void SetOptimizerFlag(); + void SetInputNodes(); + const std::vector &input_nodes() const { return input_nodes_; } + bool has_optimizer() const { return has_optimizer_; } private: // remove value node form graph @@ -234,9 +238,9 @@ class KernelGraph : public FuncGraph { std::unordered_map>> node_output_edges_; std::map> summary_nodes_; // graph needn't execute - bool executable_; + bool executable_{false}; // exist summary node in graph - bool summary_node_exist_; + bool summary_node_exist_{false}; // valid inputs std::vector valid_inputs_; @@ -251,7 +255,7 @@ class KernelGraph : public FuncGraph { CNodePtr start_label_; CNodePtr end_goto_; - bool null_output_; + bool null_output_{false}; std::unordered_map front_to_internal_outputs_map_; std::unordered_map>> internal_outputs_to_front_map_; std::unordered_map> internal_outputs_tensor_map_; @@ -260,7 +264,9 @@ class KernelGraph : public FuncGraph { std::set visited_nodes_; std::map edge_to_; std::stack loop_nodes_; - bool is_dynamic_shape_; + std::vector input_nodes_; + bool has_optimizer_{false}; + bool is_dynamic_shape_{false}; }; } // namespace session using KernelGraphPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index 7a9b44ca66d..0cb38680f10 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -852,6 +852,8 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con graph->set_summary_node_exist(true); } opt::BackendCommonOptimization(graph); + graph->SetInputNodes(); + graph->SetOptimizerFlag(); return graph; } @@ -971,11 +973,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_grap if (kernel_graph->input_ctrl_tensors()) { input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); } - std::vector input_nodes; - for (const auto &input_node : kernel_graph->inputs()) { - auto params = AnfAlgo::GetAllOutput(input_node); - std::copy(params.begin(), params.end(), std::back_inserter(input_nodes)); - } + auto &input_nodes = kernel_graph->input_nodes(); + if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) { MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() << ", input_ctrl_size:" << input_ctrl_size; @@ -1026,19 +1025,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr &kernel_grap } } -std::vector SessionBasic::GetNeedLockInputTensors(const GraphId &graph_id, +std::vector SessionBasic::GetInputNeedLockTensors(const GraphId &graph_id, const std::vector &inputs) { auto graph = GetGraph(graph_id); MS_EXCEPTION_IF_NULL(graph); - bool has_optimizer = false; - for (const auto &cnode : graph->execution_order()) { - MS_EXCEPTION_IF_NULL(cnode); - if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) { - has_optimizer = true; - break; - } - } - if (!has_optimizer) { + if (!graph->has_optimizer()) { return {}; } std::vector result; @@ -1339,6 +1330,7 @@ std::shared_ptr SessionBasic::ConstructSingleOpGraph(const OpRunInf graph->set_execution_order(exe_order); // set output CreateOutputNode(cnode, graph); + graph->SetInputNodes(); return graph; } diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index 60719121264..9c89e3ea1a9 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -98,7 +98,7 @@ class SessionBasic : public std::enable_shared_from_this { return true; } virtual void GetModelInputsInfo(uint32_t graph_id, std::vector *inputs) const {} - std::vector GetNeedLockInputTensors(const GraphId &graph_id, + std::vector GetInputNeedLockTensors(const GraphId &graph_id, const std::vector &inputs); #ifdef ENABLE_DEBUGGER // set debugger