forked from mindspore-Ecosystem/mindspore
optimize find input need lock tensor
This commit is contained in:
parent
de93d9bff1
commit
d8e72af658
|
@ -187,6 +187,8 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
||||||
MemoryAlloc(root_graph.get());
|
MemoryAlloc(root_graph.get());
|
||||||
// generate and load task into device
|
// generate and load task into device
|
||||||
Load(root_graph);
|
Load(root_graph);
|
||||||
|
root_graph->SetInputNodes();
|
||||||
|
root_graph->SetOptimizerFlag();
|
||||||
DumpAllGraphs(all_graphs);
|
DumpAllGraphs(all_graphs);
|
||||||
// return the root_graph id to backend
|
// return the root_graph id to backend
|
||||||
auto graph_id = root_graph->graph_id();
|
auto graph_id = root_graph->graph_id();
|
||||||
|
|
|
@ -271,7 +271,7 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
|
||||||
task->session_ = session;
|
task->session_ = session;
|
||||||
task->graph_id_ = graph_id;
|
task->graph_id_ = graph_id;
|
||||||
task->input_tensors_ = inputs;
|
task->input_tensors_ = inputs;
|
||||||
task->input_need_lock_tensors_ = session->GetNeedLockInputTensors(graph_id, inputs);
|
task->input_need_lock_tensors_ = session->GetInputNeedLockTensors(graph_id, inputs);
|
||||||
for (auto &tensor : inputs) {
|
for (auto &tensor : inputs) {
|
||||||
if (tensor->NeedWait()) {
|
if (tensor->NeedWait()) {
|
||||||
if (tensor->IsGraphOutput()) {
|
if (tensor->IsGraphOutput()) {
|
||||||
|
|
|
@ -169,11 +169,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
||||||
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
||||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
std::vector<AnfNodePtr> input_nodes;
|
auto &input_nodes = kernel_graph->input_nodes();
|
||||||
for (const auto &input_node : kernel_graph->inputs()) {
|
|
||||||
auto params = AnfAlgo::GetAllOutput(input_node);
|
|
||||||
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
|
|
||||||
}
|
|
||||||
auto ms_context = MsContext::GetInstance();
|
auto ms_context = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(ms_context);
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
if (inputs.size() != input_nodes.size()) {
|
if (inputs.size() != input_nodes.size()) {
|
||||||
|
|
|
@ -1212,6 +1212,25 @@ void KernelGraph::UpdateGraphDynamicAttr() {
|
||||||
is_dynamic_shape_ = false;
|
is_dynamic_shape_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void KernelGraph::SetInputNodes() {
|
||||||
|
input_nodes_.clear();
|
||||||
|
for (const auto &input_node : inputs()) {
|
||||||
|
auto params = AnfAlgo::GetAllOutput(input_node);
|
||||||
|
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void KernelGraph::SetOptimizerFlag() {
|
||||||
|
has_optimizer_ = false;
|
||||||
|
for (const auto &cnode : execution_order_) {
|
||||||
|
MS_EXCEPTION_IF_NULL(cnode);
|
||||||
|
if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) {
|
||||||
|
has_optimizer_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }
|
std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }
|
||||||
|
|
||||||
KernelGraph::~KernelGraph() {
|
KernelGraph::~KernelGraph() {
|
||||||
|
|
|
@ -185,6 +185,10 @@ class KernelGraph : public FuncGraph {
|
||||||
|
|
||||||
void UpdateGraphDynamicAttr();
|
void UpdateGraphDynamicAttr();
|
||||||
bool is_dynamic_shape() const { return is_dynamic_shape_; }
|
bool is_dynamic_shape() const { return is_dynamic_shape_; }
|
||||||
|
void SetOptimizerFlag();
|
||||||
|
void SetInputNodes();
|
||||||
|
const std::vector<AnfNodePtr> &input_nodes() const { return input_nodes_; }
|
||||||
|
bool has_optimizer() const { return has_optimizer_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// remove value node form graph
|
// remove value node form graph
|
||||||
|
@ -234,9 +238,9 @@ class KernelGraph : public FuncGraph {
|
||||||
std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_;
|
std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_;
|
||||||
std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_;
|
std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_;
|
||||||
// graph needn't execute
|
// graph needn't execute
|
||||||
bool executable_;
|
bool executable_{false};
|
||||||
// exist summary node in graph
|
// exist summary node in graph
|
||||||
bool summary_node_exist_;
|
bool summary_node_exist_{false};
|
||||||
// valid inputs
|
// valid inputs
|
||||||
std::vector<bool> valid_inputs_;
|
std::vector<bool> valid_inputs_;
|
||||||
|
|
||||||
|
@ -251,7 +255,7 @@ class KernelGraph : public FuncGraph {
|
||||||
|
|
||||||
CNodePtr start_label_;
|
CNodePtr start_label_;
|
||||||
CNodePtr end_goto_;
|
CNodePtr end_goto_;
|
||||||
bool null_output_;
|
bool null_output_{false};
|
||||||
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
|
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
|
||||||
std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_;
|
std::unordered_map<AnfNodePtr, std::unordered_map<int, std::pair<AnfNodePtr, bool>>> internal_outputs_to_front_map_;
|
||||||
std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_;
|
std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_;
|
||||||
|
@ -260,7 +264,9 @@ class KernelGraph : public FuncGraph {
|
||||||
std::set<AnfNodePtr> visited_nodes_;
|
std::set<AnfNodePtr> visited_nodes_;
|
||||||
std::map<AnfNodePtr, AnfNodePtr> edge_to_;
|
std::map<AnfNodePtr, AnfNodePtr> edge_to_;
|
||||||
std::stack<AnfNodePtr> loop_nodes_;
|
std::stack<AnfNodePtr> loop_nodes_;
|
||||||
bool is_dynamic_shape_;
|
std::vector<AnfNodePtr> input_nodes_;
|
||||||
|
bool has_optimizer_{false};
|
||||||
|
bool is_dynamic_shape_{false};
|
||||||
};
|
};
|
||||||
} // namespace session
|
} // namespace session
|
||||||
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
|
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
|
||||||
|
|
|
@ -852,6 +852,8 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
|
||||||
graph->set_summary_node_exist(true);
|
graph->set_summary_node_exist(true);
|
||||||
}
|
}
|
||||||
opt::BackendCommonOptimization(graph);
|
opt::BackendCommonOptimization(graph);
|
||||||
|
graph->SetInputNodes();
|
||||||
|
graph->SetOptimizerFlag();
|
||||||
return graph;
|
return graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -971,11 +973,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
|
||||||
if (kernel_graph->input_ctrl_tensors()) {
|
if (kernel_graph->input_ctrl_tensors()) {
|
||||||
input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
|
input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
|
||||||
}
|
}
|
||||||
std::vector<AnfNodePtr> input_nodes;
|
auto &input_nodes = kernel_graph->input_nodes();
|
||||||
for (const auto &input_node : kernel_graph->inputs()) {
|
|
||||||
auto params = AnfAlgo::GetAllOutput(input_node);
|
|
||||||
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
|
|
||||||
}
|
|
||||||
if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) {
|
if ((inputs.size() + input_ctrl_size) - 3 != input_nodes.size()) {
|
||||||
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
|
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
|
||||||
<< ", input_ctrl_size:" << input_ctrl_size;
|
<< ", input_ctrl_size:" << input_ctrl_size;
|
||||||
|
@ -1026,19 +1025,11 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<tensor::TensorPtr> SessionBasic::GetNeedLockInputTensors(const GraphId &graph_id,
|
std::vector<tensor::TensorPtr> SessionBasic::GetInputNeedLockTensors(const GraphId &graph_id,
|
||||||
const std::vector<tensor::TensorPtr> &inputs) {
|
const std::vector<tensor::TensorPtr> &inputs) {
|
||||||
auto graph = GetGraph(graph_id);
|
auto graph = GetGraph(graph_id);
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
bool has_optimizer = false;
|
if (!graph->has_optimizer()) {
|
||||||
for (const auto &cnode : graph->execution_order()) {
|
|
||||||
MS_EXCEPTION_IF_NULL(cnode);
|
|
||||||
if (kOptOperatorSet.find(AnfAlgo::GetCNodeName(cnode)) != kOptOperatorSet.end()) {
|
|
||||||
has_optimizer = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!has_optimizer) {
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
std::vector<tensor::TensorPtr> result;
|
std::vector<tensor::TensorPtr> result;
|
||||||
|
@ -1339,6 +1330,7 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
|
||||||
graph->set_execution_order(exe_order);
|
graph->set_execution_order(exe_order);
|
||||||
// set output
|
// set output
|
||||||
CreateOutputNode(cnode, graph);
|
CreateOutputNode(cnode, graph);
|
||||||
|
graph->SetInputNodes();
|
||||||
return graph;
|
return graph;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,7 +98,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {}
|
virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {}
|
||||||
std::vector<tensor::TensorPtr> GetNeedLockInputTensors(const GraphId &graph_id,
|
std::vector<tensor::TensorPtr> GetInputNeedLockTensors(const GraphId &graph_id,
|
||||||
const std::vector<tensor::TensorPtr> &inputs);
|
const std::vector<tensor::TensorPtr> &inputs);
|
||||||
#ifdef ENABLE_DEBUGGER
|
#ifdef ENABLE_DEBUGGER
|
||||||
// set debugger
|
// set debugger
|
||||||
|
|
Loading…
Reference in New Issue