forked from mindspore-Ecosystem/mindspore
backend decoupling
This commit is contained in:
parent
1edd58625f
commit
fcb0a7057c
|
@ -33,6 +33,8 @@
|
|||
"mindspore/mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc" "knownConditionTrueFalse"
|
||||
"mindspore/mindspore/ccsrc/backend/graph_compiler/backend.cc" "knownConditionTrueFalse"
|
||||
"mindspore/mindspore/ccsrc/backend/graph_compiler/backend.cc" "variableScope"
|
||||
"mindspore/mindspore/ccsrc/backend/graph_compiler/backend_base.cc" "knownConditionTrueFalse"
|
||||
"mindspore/mindspore/ccsrc/backend/graph_compiler/backend_base.cc" "variableScope"
|
||||
"mindspore/mindspore/core/ops/max_pool.cc" "zerodivcond"
|
||||
|
||||
# MindData
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -52,17 +52,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace compile {
|
||||
bool Backend::GetCond(const BaseRef &c, bool *value) {
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
return BaseRefToBool(c, value);
|
||||
}
|
||||
bool Backend::GetIndex(const BaseRef &c, int64_t *value) { return BaseRefToInt(utils::cast<ValuePtr>(c), value); }
|
||||
|
||||
Backend::Backend(const std::string &name) : name_(name), is_multi_graph_sink_(false) {
|
||||
MS_LOG(DEBUG) << "Select backend:" << name;
|
||||
convert_fn_ = MsVmConvert;
|
||||
}
|
||||
|
||||
LinConvertResult MsBackend::MsConvert(const GraphSegmentPtr &segment, const std::string &target) {
|
||||
MS_LOG(DEBUG) << "MsConvert";
|
||||
MS_EXCEPTION_IF_NULL(segment);
|
||||
|
@ -154,142 +143,6 @@ std::vector<tensor::TensorPtr> GetTensorWithoutValueMask(const session::BackendO
|
|||
return tensors_without_value_node;
|
||||
}
|
||||
|
||||
void PushInputTensor(const BaseRef &arg, std::vector<tensor::TensorPtr> *inputs) {
|
||||
MS_EXCEPTION_IF_NULL(inputs);
|
||||
if (utils::isa<tensor::TensorPtr>(arg)) {
|
||||
auto value = utils::cast<tensor::TensorPtr>(arg);
|
||||
inputs->push_back(value);
|
||||
} else if (utils::isa<ValuePtr>(arg)) {
|
||||
auto value = utils::cast<ValuePtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
auto value_tuple = value->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple);
|
||||
auto tuple_value = value_tuple->value();
|
||||
(void)std::transform(tuple_value.begin(), tuple_value.end(), std::back_inserter(*inputs),
|
||||
[](const ValuePtr &v) { return v->cast<tensor::TensorPtr>(); });
|
||||
} else if (value->isa<Scalar>()) {
|
||||
tensor::TensorPtr scalar_tensor = ScalarToTensor(value->cast<ScalarPtr>());
|
||||
inputs->push_back(scalar_tensor);
|
||||
} else if (value->isa<Monad>()) {
|
||||
// If value is a monad, replace it with an unused tensor.
|
||||
inputs->push_back(std::make_shared<tensor::Tensor>(int64_t(0), kBool));
|
||||
} else {
|
||||
inputs->push_back(value->cast<tensor::TensorPtr>());
|
||||
}
|
||||
} else if (utils::isa<PyObjectRef>(arg)) {
|
||||
auto value = utils::cast<PyObjectRef>(arg).object_;
|
||||
inputs->push_back(py::cast<tensor::TensorPtr>(value));
|
||||
} else if (utils::isa<VectorRefPtr>(arg)) {
|
||||
const auto &args_new = utils::cast<VectorRef>(arg);
|
||||
for (const auto &v : args_new) {
|
||||
PushInputTensor(v, inputs);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Invalid input type.";
|
||||
}
|
||||
}
|
||||
|
||||
// Move these function to anonymous namespace
|
||||
void FlatValueTupleValue(const ValuePtrList &value, ValuePtrList *flatted_value) {
|
||||
MS_EXCEPTION_IF_NULL(flatted_value);
|
||||
for (auto value_element : value) {
|
||||
MS_EXCEPTION_IF_NULL(value_element);
|
||||
if (utils::isa<tensor::TensorPtr>(value_element)) {
|
||||
(void)flatted_value->emplace_back(value_element);
|
||||
} else if (utils::isa<ValueTuplePtr>(value_element)) {
|
||||
auto value_tuple_element = value_element->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple_element);
|
||||
FlatValueTupleValue(value_tuple_element->value(), flatted_value);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "The value input to FlatValueTupleValue should only contains Tensor and ValueTuple.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlattenValue(const BaseRef &arg, ValuePtrList *flatted_value) {
|
||||
MS_EXCEPTION_IF_NULL(flatted_value);
|
||||
if (utils::isa<ValueSequencePtr>(arg)) {
|
||||
auto value_sequence = utils::cast<ValueSequencePtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value_sequence);
|
||||
auto sequence_value = value_sequence->value();
|
||||
for (auto &value : sequence_value) {
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<tensor::Tensor>()) {
|
||||
(void)flatted_value->emplace_back(value);
|
||||
} else {
|
||||
FlattenValue(value, flatted_value);
|
||||
}
|
||||
}
|
||||
} else if (utils::isa<ValueDictionaryPtr>(arg)) {
|
||||
auto value_dict = utils::cast<ValueDictionaryPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value_dict);
|
||||
auto dict_value = value_dict->value();
|
||||
for (auto &iter : dict_value) {
|
||||
auto value = iter.second;
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<tensor::Tensor>()) {
|
||||
(void)flatted_value->emplace_back(value);
|
||||
} else {
|
||||
FlattenValue(value, flatted_value);
|
||||
}
|
||||
}
|
||||
} else if (utils::isa<tensor::COOTensorPtr>(arg)) {
|
||||
auto coo_tensor = utils::cast<tensor::COOTensorPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(coo_tensor);
|
||||
for (size_t i = 0; i < coo_tensor->GetTensorLength(); ++i) {
|
||||
(void)flatted_value->emplace_back(coo_tensor->GetTensorAt(i));
|
||||
}
|
||||
} else if (utils::isa<tensor::CSRTensorPtr>(arg)) {
|
||||
auto csr_tensor = utils::cast<tensor::CSRTensorPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(csr_tensor);
|
||||
for (size_t i = 0; i < csr_tensor->GetTensorLength(); ++i) {
|
||||
(void)flatted_value->emplace_back(csr_tensor->GetTensorAt(i));
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "The value input to flatten should only contains be sequence or dictionary, but it is "
|
||||
<< arg.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the front_node related tensor in the input_tensor.
|
||||
void PushTensor(const VectorRef &args, const std::vector<AnfNodePtr> ¶meters, const AnfNodePtr &front_node,
|
||||
std::vector<tensor::TensorPtr> *input_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(input_tensors);
|
||||
const auto &iter = std::find(parameters.begin(), parameters.end(), front_node);
|
||||
if (iter == parameters.end()) {
|
||||
(void)((*input_tensors).emplace_back(nullptr));
|
||||
return;
|
||||
}
|
||||
auto position = iter - parameters.begin();
|
||||
PushInputTensor(args[position], input_tensors);
|
||||
}
|
||||
|
||||
void PushTupleTensor(const VectorRef &args, const std::vector<AnfNodePtr> ¶meters, const AnfNodePtr &front_node,
|
||||
size_t index, std::vector<tensor::TensorPtr> *input_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(input_tensors);
|
||||
const auto &iter = std::find(parameters.begin(), parameters.end(), front_node);
|
||||
const size_t position = iter - parameters.begin();
|
||||
// If the parameter is not found in the parameters of the root graph, it means that it is the input of the subgraph,
|
||||
// and there is no need to input a tensor.
|
||||
if (position >= args.size()) {
|
||||
MS_LOG(DEBUG) << "Position out of args range, position value is " << position << " and args size is " << args.size()
|
||||
<< ".";
|
||||
(void)input_tensors->emplace_back(nullptr);
|
||||
return;
|
||||
}
|
||||
ValuePtrList flatted_value_tuple_value;
|
||||
FlattenValue(args[position], &flatted_value_tuple_value);
|
||||
if (index >= flatted_value_tuple_value.size()) {
|
||||
MS_LOG(EXCEPTION) << "Index out of flatted_value_tuple_value range, index value is " << index
|
||||
<< " and flatted_value_tuple_value size is " << flatted_value_tuple_value.size() << ".";
|
||||
}
|
||||
auto input = flatted_value_tuple_value[index];
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
auto tensor_input = input->cast<tensor::TensorPtr>();
|
||||
input_tensors->push_back(tensor_input);
|
||||
}
|
||||
|
||||
void UpdateOutputAbstract(const KernelGraphPtr &kernel_graph, const session::BackendOpRunInfoPtr &op_run_info) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
MS_EXCEPTION_IF_NULL(op_run_info);
|
||||
|
@ -302,35 +155,6 @@ void UpdateOutputAbstract(const KernelGraphPtr &kernel_graph, const session::Bac
|
|||
}
|
||||
}
|
||||
|
||||
TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index) {
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
// Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync
|
||||
// when infer type is not equal to device type.
|
||||
auto type_id = common::AnfAlgo::GetOutputInferDataType(output_node, output_index);
|
||||
const auto &shape = common::AnfAlgo::GetOutputInferShape(output_node, output_index);
|
||||
auto tensor = std::make_shared<tensor::Tensor>(type_id, shape);
|
||||
tensor->set_padding_type(AnfAlgo::GetOutputReshapeType(output_node, output_index));
|
||||
|
||||
// Put device tensor into host tensor.
|
||||
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(output_node, output_index, false);
|
||||
MS_EXCEPTION_IF_NULL(device_tensor);
|
||||
device_tensor->SetNodeIndex(output_node, output_index);
|
||||
tensor->set_device_address(device_tensor);
|
||||
tensor->set_sync_status(kNeedSyncDeviceToHost);
|
||||
|
||||
// MindRT is disabled in the multi graphs scenario
|
||||
// Delete tensor->data_sync() when MindRT is enabled in all scenes.
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
|
||||
// If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph
|
||||
// Mode, if the graph contain no CNode after optimization, the tensor need sync to host.
|
||||
tensor->data_sync(false);
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
device::DeviceAddressPtr CloneEmptyDeviceAddress(const device::DeviceAddressPtr &old_device_address,
|
||||
const DeviceContext *device_context) {
|
||||
MS_EXCEPTION_IF_NULL(old_device_address);
|
||||
|
@ -413,47 +237,6 @@ bool EnablePyNativeSyncRunning() {
|
|||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
return ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
|
||||
}
|
||||
|
||||
std::vector<std::vector<tensor::TensorPtr>> GetRunGraphInputs(const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args) {
|
||||
const auto &origin_parameters = graph_compiler_info.origin_parameters_order_;
|
||||
std::vector<std::vector<tensor::TensorPtr>> input_tensor_lists;
|
||||
for (const auto &kernel_graph : graph_compiler_info.graphs_) {
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
for (const auto &input_node : kernel_graph->input_nodes()) {
|
||||
auto element_pair = kernel_graph->GetElementInTupleBackendFrontIndexMap(input_node);
|
||||
if (element_pair.first) {
|
||||
PushTupleTensor(args, origin_parameters, element_pair.first, element_pair.second, &input_tensors);
|
||||
} else {
|
||||
const auto &front_node = kernel_graph->GetFrontAnfByBackendAnf(input_node);
|
||||
PushTensor(args, origin_parameters, front_node, &input_tensors);
|
||||
}
|
||||
}
|
||||
(void)input_tensor_lists.emplace_back(input_tensors);
|
||||
}
|
||||
|
||||
// Input tensors of the control node.
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_info.control_node_parser_);
|
||||
// Get inputs of control node which come from the host actor.
|
||||
const auto &control_node_parameters = graph_compiler_info.control_node_parser_->control_node_parameters();
|
||||
for (const auto ¶meter_with_index : control_node_parameters) {
|
||||
const auto ¶meter = parameter_with_index.first;
|
||||
MS_EXCEPTION_IF_NULL(parameter);
|
||||
const auto &abs = parameter->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abs);
|
||||
if (abs->isa<abstract::AbstractTuple>()) {
|
||||
MS_LOG(DEBUG) << "Fetch input tensor for tuple parameter:" << parameter->DebugString() << " in control flow.";
|
||||
PushTupleTensor(args, origin_parameters, parameter, parameter_with_index.second, &input_tensors);
|
||||
} else {
|
||||
PushTensor(args, origin_parameters, parameter, &input_tensors);
|
||||
}
|
||||
}
|
||||
(void)input_tensor_lists.emplace_back(input_tensors);
|
||||
|
||||
return input_tensor_lists;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target) {
|
||||
|
@ -540,194 +323,6 @@ void MsBackend::SetDebugger() {
|
|||
}
|
||||
#endif
|
||||
|
||||
MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string &device_name, uint32_t device_id)
|
||||
: Backend(backend_name), device_name_(device_name) {
|
||||
root_graph_ = nullptr;
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
|
||||
auto &cut_list = pynative_mode ? GetControlOps() : GetMsNonlinearOps();
|
||||
|
||||
graph_partition_ = std::make_shared<GraphPartition>(cut_list, backend_name);
|
||||
graph_compiler_ = std::make_shared<GraphCompiler>();
|
||||
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_name, device_id});
|
||||
device_context->Initialize();
|
||||
device_id_ = device_context->device_context_key().device_id_;
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
SetDebuggerInit();
|
||||
#endif
|
||||
runtime::GraphScheduler::GetInstance().Initialize();
|
||||
}
|
||||
|
||||
void MindRTBackend::ProcessNotSupportCnode(const FuncGraphPtr &func_graph,
|
||||
const mindspore::device::DeviceType &old_target,
|
||||
const mindspore::device::DeviceType &new_target) const {
|
||||
const auto &all_nodes = TopoSort(func_graph->return_node(), SuccDeeperSimple, AlwaysInclude);
|
||||
for (const auto &node : all_nodes) {
|
||||
if (!node->isa<CNode>()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
if (!common::AnfAlgo::HasNodeAttr(kAttrNotSupportOpForDevice, cnode)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto not_support_device = common::AnfAlgo::GetNodeAttr<std::string>(node, kAttrNotSupportOpForDevice);
|
||||
if (device::GetDeviceTypeByName(not_support_device) != old_target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
common::AnfAlgo::SetNodeAttr(kAttrPrimitiveTarget, MakeValue(device::GetDeviceNameByType(new_target)), node);
|
||||
}
|
||||
}
|
||||
|
||||
const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_LOG(INFO) << "Status record: start compile function graph: " << func_graph->ToString();
|
||||
PROF_START(compile_func_graph);
|
||||
auto root_graph = WrapPrimitives(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
root_graph_ = root_graph;
|
||||
// Register a summary callback function, which is called in the final stages of summary.
|
||||
graph_compiler_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
ms_execution_mode_ = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
|
||||
real_execution_mode_ = ms_execution_mode_;
|
||||
func_graph->set_flag(kFlagPyNativeRunInGraph, real_execution_mode_ == kPynativeMode);
|
||||
|
||||
// Compile root graph.
|
||||
graph_id_to_device_context_.clear();
|
||||
func_graph_to_kernel_graph_ids_.clear();
|
||||
control_nodes_.clear();
|
||||
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_name_, device_id_});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
bool all_support = device_context->PartitionGraph(func_graph);
|
||||
if (all_support) {
|
||||
auto run_mode = device_context->GetRunMode(func_graph);
|
||||
if (run_mode == device::RunMode::kGraphMode && pynative::GraphAdapter::PyNativeEnableTaskSink(func_graph)) {
|
||||
auto graph_id = graph_compiler_->CompileWholeGraphForGraphRunMode(func_graph, device_context);
|
||||
graph_id_to_device_context_[graph_id] = device_context;
|
||||
} else {
|
||||
CompileSubGraph(func_graph, device::RunMode::kKernelMode);
|
||||
}
|
||||
} else {
|
||||
ProcessNotSupportCnode(func_graph, device_context->GetDeviceType(), mindspore::device::DeviceType::kCPU);
|
||||
CompileSubGraph(func_graph);
|
||||
}
|
||||
|
||||
// Construct the graph compiler info.
|
||||
auto graph_compiler_info = ConstructGraphCompilerInfo(root_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_info);
|
||||
if (real_execution_mode_ == kGraphMode &&
|
||||
((!graph_compiler_info->graphs_.empty()) || graph_compiler_info->control_nodes_.size() > 1)) {
|
||||
// Transform graph to actor DAG, and schedule the actor DAG.
|
||||
ParseControlNodes(*graph_compiler_info);
|
||||
const auto &actor_set = runtime::GraphScheduler::GetInstance().Transform(*graph_compiler_info);
|
||||
runtime::GraphScheduler::GetInstance().Schedule(actor_set);
|
||||
}
|
||||
const ActorInfo &actor_info = graph_compiler_info->name_;
|
||||
(void)actor_to_graph_compiler_info_.emplace(graph_compiler_info->name_, std::move(graph_compiler_info));
|
||||
PROF_END(compile_func_graph);
|
||||
|
||||
if (ms_execution_mode_ != real_execution_mode_) {
|
||||
context_ptr->set_param<int>(MS_CTX_EXECUTION_MODE, ms_execution_mode_);
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Status record: end compile function graph: " << func_graph->ToString()
|
||||
<< ", produce actor: " << actor_info;
|
||||
return actor_info;
|
||||
}
|
||||
|
||||
void MindRTBackend::CompileSubGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode) {
|
||||
auto root_graph = WrapPrimitives(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
CompileGraph(root_graph, run_mode);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(root_graph->manager());
|
||||
FuncGraphSet sub_graphs = root_graph->manager()->func_graphs();
|
||||
for (const auto &sub_graph : sub_graphs) {
|
||||
if (sub_graph != func_graph && sub_graph != nullptr) {
|
||||
CompileGraph(sub_graph, run_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::CompileGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode) {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_partition_);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
|
||||
bool contain_multi_target = false;
|
||||
// Split graph to segments.
|
||||
const auto &segments = graph_partition_->Partition(func_graph, &contain_multi_target);
|
||||
MS_LOG(INFO) << "Compile graph: " << func_graph->ToString() << ", Split segments size:" << segments.size();
|
||||
|
||||
// Foreach the segments to compile graph.
|
||||
for (const auto &segment : segments) {
|
||||
CompileGraph(segment, run_mode);
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::CompileGraph(const GraphSegmentPtr &segment, device::RunMode run_mode) {
|
||||
MS_EXCEPTION_IF_NULL(segment);
|
||||
// Compile the normal nodes, which doesn't contain the cut node.
|
||||
if (segment->nodes_.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "The segments size is 0.";
|
||||
}
|
||||
if (!segment->is_cut_) {
|
||||
MS_EXCEPTION_IF_NULL(segment->nodes_[0]);
|
||||
MS_LOG(INFO) << "Compile normal segment, the first node: " << segment->nodes_[0]->DebugString();
|
||||
|
||||
// Get the device context.
|
||||
const auto &cur_device_name = GetCNodeTarget(segment->nodes_[0]);
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({cur_device_name, device_id_});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
device_context->Initialize();
|
||||
|
||||
// Transform nodes to inputs and outputs.
|
||||
FuncGraphPtr fg;
|
||||
AnfNodePtrList inputs;
|
||||
AnfNodePtrList outputs;
|
||||
std::tie(fg, inputs, outputs) = TransformSegmentToAnfGraph(segment->nodes_);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
// Compile graph.
|
||||
auto graph_id =
|
||||
graph_compiler_->CompileGraph(segment, outputs, device_context, run_mode, real_execution_mode_ == kPynativeMode);
|
||||
|
||||
graph_id_to_device_context_[graph_id] = device_context;
|
||||
|
||||
const auto &func_graph = segment->nodes_[0]->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
if (func_graph_to_kernel_graph_ids_.find(func_graph) == func_graph_to_kernel_graph_ids_.end()) {
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].emplace_back(std::vector<GraphId>{graph_id});
|
||||
} else {
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].back().emplace_back(graph_id);
|
||||
}
|
||||
} else {
|
||||
// Compile the cut node.
|
||||
auto cut_node = segment->nodes_[0];
|
||||
MS_EXCEPTION_IF_NULL(cut_node);
|
||||
MS_LOG(INFO) << "Compile cut segment, the cut node: " << cut_node->DebugString();
|
||||
control_nodes_.push_back(cut_node);
|
||||
if (common::AnfAlgo::IsCallNode(cut_node) || common::AnfAlgo::CheckPrimitiveType(cut_node, prim::kPrimSwitch) ||
|
||||
common::AnfAlgo::CheckPrimitiveType(cut_node, prim::kPrimSwitchLayer)) {
|
||||
const auto &func_graph = cut_node->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].emplace_back(std::vector<GraphId>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void GetControlOpInput(const std::shared_ptr<GraphCompiler> &graph_compiler, const CNodePtr &front_cnode,
|
||||
const CNodePtr &backend_cnode, const std::map<KernelWithIndex, tensor::TensorPtr> &op_output_map,
|
||||
|
@ -878,101 +473,36 @@ void RunControlOperator(const std::shared_ptr<GraphCompiler> &graph_compiler, co
|
|||
}
|
||||
}
|
||||
|
||||
void TensorValueToVector(const ValuePtr &value, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
auto value_tuple = value->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple);
|
||||
for (size_t i = 0; i < value_tuple->size(); ++i) {
|
||||
ValuePtr element = value_tuple->value()[i];
|
||||
MS_EXCEPTION_IF_NULL(element);
|
||||
if (element->isa<tensor::Tensor>()) {
|
||||
auto tensor = element->cast<tensor::TensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
outputs->emplace_back(tensor);
|
||||
} else if (element->isa<ValueTuple>()) {
|
||||
VectorRef tuple;
|
||||
TensorValueToVector(element, &tuple);
|
||||
outputs->emplace_back(tuple);
|
||||
}
|
||||
}
|
||||
} else if (value->isa<tensor::Tensor>()) {
|
||||
auto tensor = value->cast<tensor::TensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
outputs->emplace_back(tensor);
|
||||
}
|
||||
TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index) {
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
// Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync
|
||||
// when infer type is not equal to device type.
|
||||
auto type_id = common::AnfAlgo::GetOutputInferDataType(output_node, output_index);
|
||||
const auto &shape = common::AnfAlgo::GetOutputInferShape(output_node, output_index);
|
||||
auto tensor = std::make_shared<tensor::Tensor>(type_id, shape);
|
||||
tensor->set_padding_type(AnfAlgo::GetOutputReshapeType(output_node, output_index));
|
||||
|
||||
// Put device tensor into host tensor.
|
||||
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(output_node, output_index, false);
|
||||
MS_EXCEPTION_IF_NULL(device_tensor);
|
||||
device_tensor->SetNodeIndex(output_node, output_index);
|
||||
tensor->set_device_address(device_tensor);
|
||||
tensor->set_sync_status(kNeedSyncDeviceToHost);
|
||||
|
||||
// MindRT is disabled in the multi graphs scenario
|
||||
// Delete tensor->data_sync() when MindRT is enabled in all scenes.
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
|
||||
// If execution mode is Graph Mode in MsContext, the tensor will be the input of graph which will execute in Graph
|
||||
// Mode, if the graph contain no CNode after optimization, the tensor need sync to host.
|
||||
tensor->data_sync(false);
|
||||
}
|
||||
|
||||
bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &graph_output, const VectorRef &args, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(graph_output);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
if (graph_output->isa<ValueNode>()) {
|
||||
MS_LOG(INFO) << "Graph's output is a constant. No need to execute.";
|
||||
VectorRef output_tmp;
|
||||
ValuePtr value = GetValueNode(graph_output);
|
||||
TensorValueToVector(value, &output_tmp);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
outputs->emplace_back(output_tmp);
|
||||
} else if (value->isa<tensor::Tensor>()) {
|
||||
*outputs = output_tmp;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Graph output is empty!";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (graph_output->isa<Parameter>()) {
|
||||
MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute.";
|
||||
// Find the right parameter as ret_val.
|
||||
auto func_graph = graph_output->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto params = func_graph->parameters();
|
||||
if (args.size() != params.size()) {
|
||||
MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to graph input size " << params.size();
|
||||
}
|
||||
|
||||
auto it = std::find(params.begin(), params.end(), graph_output);
|
||||
if (it == params.end()) {
|
||||
MS_EXCEPTION(UnknownError) << "When graph output is Parameter, it should be found in graph parameters";
|
||||
}
|
||||
size_t index = it - params.cbegin();
|
||||
if (index >= args.size()) {
|
||||
MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size();
|
||||
}
|
||||
|
||||
outputs->emplace_back(args[index]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return tensor;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void MindRTBackend::ConstructOutputs(runtime::ActorSet *actor_set, VectorRef *outputs, const FuncGraphPtr &root_graph) {
|
||||
bool need_contruct_output = !(distributed::recovery::RecoveryContext::GetInstance()->enable_recovery() &&
|
||||
distributed::recovery::RecoveryContext::GetInstance()->need_reset());
|
||||
bool is_embedding_cache_server = false;
|
||||
#ifdef WITH_BACKEND
|
||||
is_embedding_cache_server = ps::PSContext::instance()->cache_enable() && ps::PSContext::instance()->is_server();
|
||||
#endif
|
||||
if (need_contruct_output) {
|
||||
// Update device address for output node of graph.
|
||||
// Summary processing will use the output device address, so must be after the summary processing.
|
||||
if (!is_embedding_cache_server) {
|
||||
actor_set->output_actor_->UpdateOutputDeviceAddress();
|
||||
}
|
||||
|
||||
// Fetch outputs.
|
||||
MS_EXCEPTION_IF_NULL(actor_set->output_actor_);
|
||||
auto &output_tensors = actor_set->output_actor_->outputs();
|
||||
if (!output_tensors.empty()) {
|
||||
size_t output_position = 0;
|
||||
ConstructOutputs(root_graph->output(), output_tensors, &output_position, outputs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::RunGraphByActors(const ActorInfo &actor_info, const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args, VectorRef *outputs) {
|
||||
WaitTaskFinish();
|
||||
|
@ -1156,176 +686,6 @@ void MindRTBackend::RunGraphByCondition(const ActorInfo &actor_info, const Graph
|
|||
MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
|
||||
}
|
||||
|
||||
void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(root_graph_);
|
||||
if (IsGraphOutputValueNodeOrParameter(root_graph_->output(), args, outputs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto &context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
|
||||
MS_LOG(INFO) << "PrecompileOnly, stop run graph";
|
||||
return;
|
||||
}
|
||||
|
||||
// Open abstract_lock for dynamic_shape
|
||||
AnfUtils::OpenAbstractLock();
|
||||
|
||||
MS_LOG(INFO) << "Status record: start run actor: " << actor_info;
|
||||
// Fetch the graph compiler info.
|
||||
const auto &graph_iter = actor_to_graph_compiler_info_.find(actor_info);
|
||||
if (graph_iter == actor_to_graph_compiler_info_.end()) {
|
||||
MS_LOG(EXCEPTION) << "Can't find the graph compiler info.";
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(graph_iter->second);
|
||||
const auto &graph_compiler_info = *(graph_iter->second);
|
||||
// For pynative and graph mix execution.
|
||||
WaitTaskFinish();
|
||||
|
||||
// Run in the pynative mode.
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
// There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
|
||||
if (real_execution_mode_ == kPynativeMode) {
|
||||
RunGraphByCondition(actor_info, graph_compiler_info, args, outputs);
|
||||
return;
|
||||
}
|
||||
|
||||
auto input_tensors = GetRunGraphInputs(graph_compiler_info, args);
|
||||
// Release python gil.
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
// Run actor DAG.
|
||||
const auto &actor_set = runtime::GraphScheduler::GetInstance().Fetch(actor_info);
|
||||
MS_EXCEPTION_IF_NULL(actor_set);
|
||||
runtime::GraphScheduler::GetInstance().Run(actor_set, input_tensors);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
graph_compiler_->Summary(graph_compiler_info.graphs_);
|
||||
|
||||
ConstructOutputs(actor_set, outputs, root_graph_);
|
||||
|
||||
runtime::GraphScheduler::GetInstance().ClearActorData(actor_set);
|
||||
// Close abstract_lock for dynamic_shape
|
||||
AnfUtils::CloseAbstractLock();
|
||||
MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
|
||||
}
|
||||
|
||||
BaseRef MindRTBackend::ConstructOutputByAbstract(const abstract::AbstractBasePtr &abstract,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors,
|
||||
size_t *output_position) {
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
MS_EXCEPTION_IF_NULL(output_position);
|
||||
|
||||
size_t outputs_num = common::AnfAlgo::GetOutputNumByAbstract(abstract);
|
||||
if (*output_position + outputs_num > output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position << " need:" << outputs_num
|
||||
<< " total:" << output_tensors.size();
|
||||
}
|
||||
VectorRef outputs;
|
||||
|
||||
if (!abstract->isa<abstract::AbstractTuple>()) {
|
||||
(*output_position)++;
|
||||
return output_tensors[(*output_position) - 1];
|
||||
}
|
||||
|
||||
auto tuple_abstract = abstract->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_abstract);
|
||||
const auto &sub_abstracts = tuple_abstract->elements();
|
||||
for (const auto &sub_abstract : sub_abstracts) {
|
||||
MS_EXCEPTION_IF_NULL(sub_abstract);
|
||||
outputs.emplace_back(ConstructOutputByAbstract(sub_abstract, output_tensors, output_position));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
void MindRTBackend::ConstructOutputs(const AnfNodePtr &output_node,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors, size_t *output_position,
|
||||
VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
MS_EXCEPTION_IF_NULL(output_position);
|
||||
const PrimitiveSet expand_prims{
|
||||
prim::kPrimMakeTuple,
|
||||
prim::kPrimMakeCSRTensor,
|
||||
prim::kPrimMakeCOOTensor,
|
||||
prim::kPrimMakeRowTensor,
|
||||
};
|
||||
// The MakeTuple/MakeSaprse node need expand and recurse.
|
||||
if (IsOneOfPrimitiveCNode(output_node, expand_prims)) {
|
||||
auto make_tuple = output_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(make_tuple);
|
||||
VectorRef make_tuple_output;
|
||||
for (size_t i = 1; i < make_tuple->inputs().size(); i++) {
|
||||
ConstructOutputs(make_tuple->input(i), output_tensors, output_position, &make_tuple_output);
|
||||
}
|
||||
outputs->emplace_back(std::move(make_tuple_output));
|
||||
return;
|
||||
}
|
||||
|
||||
// The depend node need get the real node.
|
||||
if (common::AnfAlgo::CheckPrimitiveType(output_node, prim::kPrimDepend)) {
|
||||
auto depend_node = output_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(depend_node);
|
||||
ConstructOutputs(depend_node->input(kRealInputIndexInDepend), output_tensors, output_position, outputs);
|
||||
return;
|
||||
}
|
||||
|
||||
auto outputs_num = common::AnfAlgo::GetOutputTensorNum(output_node);
|
||||
// The value node uses the value to be output, to avoid the host memory of value free due to value node destruction.
|
||||
if (output_node->isa<ValueNode>()) {
|
||||
auto value = output_node->cast<ValueNodePtr>()->value();
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
outputs->emplace_back(value);
|
||||
(*output_position) += CountValueNum(value->cast<ValueTuplePtr>());
|
||||
} else if (outputs_num != 0) {
|
||||
outputs->emplace_back(value);
|
||||
(*output_position) += outputs_num;
|
||||
}
|
||||
// The empty value node return the empty VectorRef.
|
||||
return;
|
||||
}
|
||||
|
||||
if (common::AnfAlgo::IsCallNode(output_node)) {
|
||||
auto abstract = output_node->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
outputs->emplace_back(ConstructOutputByAbstract(abstract, output_tensors, output_position));
|
||||
return;
|
||||
}
|
||||
|
||||
auto &output_abstract = output_node->abstract();
|
||||
MS_EXCEPTION_IF_NULL(output_abstract);
|
||||
// Wrap output to VectorRef if the output is tuple.
|
||||
if (output_abstract->isa<abstract::AbstractTuple>()) {
|
||||
VectorRef output_tuple;
|
||||
for (size_t i = 0; i < outputs_num; ++i) {
|
||||
if (*output_position >= output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position;
|
||||
}
|
||||
output_tuple.emplace_back(std::move(output_tensors[*output_position]));
|
||||
++(*output_position);
|
||||
}
|
||||
outputs->emplace_back(std::move(output_tuple));
|
||||
} else {
|
||||
for (size_t i = 0; i < outputs_num; ++i) {
|
||||
if (*output_position >= output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position;
|
||||
}
|
||||
outputs->emplace_back(std::move(output_tensors[*output_position]));
|
||||
++(*output_position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
void MindRTBackend::SetDebuggerInit() {
|
||||
auto debugger_ = Debugger::GetInstance();
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
debugger_->Init(device_id_, ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET));
|
||||
}
|
||||
#endif
|
||||
|
||||
void MindRTBackend::WaitTaskFinish() const { runtime::OpExecutor::GetInstance().Wait(); }
|
||||
|
||||
void MindRTBackend::ClearOpExecutorResource() const { runtime::OpExecutor::GetInstance().Reset(); }
|
||||
|
@ -1340,54 +700,6 @@ void MindRTBackend::SyncStream() {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<GraphCompilerInfo> MindRTBackend::ConstructGraphCompilerInfo(const FuncGraphPtr &root_graph) {
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
|
||||
std::vector<KernelGraphPtr> graphs;
|
||||
std::vector<DeviceContext *> device_contexts;
|
||||
std::string name = "kernel_graph";
|
||||
size_t graph_index = 0;
|
||||
for (const auto &graph_id_to_context : graph_id_to_device_context_) {
|
||||
(void)graphs.emplace_back(graph_compiler_->Fetch(graph_id_to_context.first));
|
||||
(void)device_contexts.emplace_back(graph_id_to_context.second);
|
||||
if (graph_index == 0) {
|
||||
(void)name.append("_").append(std::to_string(graph_id_to_context.first));
|
||||
} else if (graph_index == graph_id_to_device_context_.size() - 1) {
|
||||
(void)name.append("-").append(std::to_string(graph_id_to_context.first));
|
||||
}
|
||||
++graph_index;
|
||||
}
|
||||
|
||||
auto parser = std::make_shared<ControlNodeParser>();
|
||||
|
||||
runtime::KernelMapPosition outputs_order;
|
||||
const auto &root_output =
|
||||
common::AnfAlgo::VisitKernelWithReturnType(root_graph->output(), 0, false, {prim::kPrimTupleGetItem}).first;
|
||||
size_t position = 0;
|
||||
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(root_output);
|
||||
size_t outputs_num = outputs.size();
|
||||
for (const auto &output : outputs) {
|
||||
if (outputs_order.count(output) == 0) {
|
||||
outputs_order[output] = {position++};
|
||||
} else {
|
||||
(void)outputs_order[output].emplace_back(position++);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t> *> tensors_mask;
|
||||
std::vector<std::vector<tensor::TensorPtr> *> input_tensors;
|
||||
auto strategy = runtime::GraphExecutionStrategy::kPipeline;
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
|
||||
strategy = runtime::GraphExecutionStrategy::kPipelineWithExecutionOrder;
|
||||
}
|
||||
return std::make_shared<GraphCompilerInfo>(graphs, device_contexts, tensors_mask, input_tensors, control_nodes_,
|
||||
root_graph->parameters(), parser, outputs_order, outputs_num, name, false,
|
||||
strategy);
|
||||
}
|
||||
|
||||
void MindRTBackend::EraseSingleOpCache(const GraphInfo &graph_info) {
|
||||
pynative::OpCompiler::GetInstance().ClearOpCache(graph_info);
|
||||
}
|
||||
|
@ -1623,25 +935,5 @@ void MindRTBackend::UpdateOutput(const std::vector<session::KernelWithIndex> &ou
|
|||
outputs->emplace_back(output_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::ParseControlNodes(const GraphCompilerInfo &graph_compile_info) {
|
||||
FuncGraphToKernelGraphGroup func_graph_to_kernel_graphs;
|
||||
for (const auto &func_graph_to_kernel_graph_ids : func_graph_to_kernel_graph_ids_) {
|
||||
const auto &func_graph = func_graph_to_kernel_graph_ids.first;
|
||||
for (const auto &sub_kernel_graphs_ids : func_graph_to_kernel_graph_ids.second) {
|
||||
std::vector<KernelGraphPtr> kernel_graphs;
|
||||
for (const auto &graph_id : sub_kernel_graphs_ids) {
|
||||
const auto &kernel_graph = graph_compiler_->Fetch(graph_id);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
(void)kernel_graphs.emplace_back(kernel_graph);
|
||||
}
|
||||
(void)func_graph_to_kernel_graphs[func_graph].emplace_back(kernel_graphs);
|
||||
}
|
||||
}
|
||||
|
||||
graph_compile_info.control_node_parser_->Parse(control_nodes_, graph_compile_info.graphs_,
|
||||
graph_compile_info.device_contexts_, root_graph_,
|
||||
func_graph_to_kernel_graphs);
|
||||
}
|
||||
} // namespace compile
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019-2021 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -27,6 +27,7 @@
|
|||
#include "utils/hash_map.h"
|
||||
#include "include/common/utils/contract.h"
|
||||
#include "ir/anf.h"
|
||||
#include "backend/graph_compiler/backend_base.h"
|
||||
#include "backend/graph_compiler/segment_runner.h"
|
||||
#include "backend/graph_compiler/graph_partition.h"
|
||||
#include "backend/graph_compiler/vm.h"
|
||||
|
@ -39,43 +40,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace compile {
|
||||
using GraphOutputInfo = session::GraphOutputInfo;
|
||||
using DeviceContext = device::DeviceContext;
|
||||
using ActorInfo = runtime::ActorInfo;
|
||||
using GraphCompiler = runtime::GraphCompiler;
|
||||
using GraphCompilerInfo = runtime::GraphCompilerInfo;
|
||||
using ControlNodeParser = runtime::ControlNodeParser;
|
||||
using FuncGraphToKernelGraphGroup = runtime::FuncGraphToKernelGraphGroup;
|
||||
using ControlNodeParserPtr = runtime::ControlNodeParserPtr;
|
||||
using KernelWithIndex = session::KernelWithIndex;
|
||||
|
||||
enum SwitchCondStatus {
|
||||
kCondOk = 0,
|
||||
kCondAlreadyRun,
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT Backend {
|
||||
public:
|
||||
explicit Backend(const std::string &name);
|
||||
|
||||
virtual ~Backend() = default;
|
||||
|
||||
LinkFuncType convert_fn() { return convert_fn_; }
|
||||
std::string name() { return name_; }
|
||||
virtual bool GetCond(const BaseRef &c, bool *value);
|
||||
virtual bool GetIndex(const BaseRef &c, int64_t *value);
|
||||
virtual GraphId CompileGraph(NotNull<FuncGraphPtr> fg) { return kInvalidGraphId; }
|
||||
virtual void SetDebugger() {}
|
||||
|
||||
bool is_multi_graph_sink() const { return is_multi_graph_sink_; }
|
||||
void set_is_multi_graph_sink(bool flag) { is_multi_graph_sink_ = flag; }
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
LinkFuncType convert_fn_;
|
||||
bool is_multi_graph_sink_;
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT MsBackend : public Backend {
|
||||
public:
|
||||
MsBackend(const std::string &name, const std::string &target, uint32_t device_id);
|
||||
|
@ -102,59 +66,30 @@ class BACKEND_EXPORT MsBackend : public Backend {
|
|||
mindspore::HashMap<GraphId, LinConvertResult> graph_id_map_;
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT MindRTBackend : public Backend {
|
||||
class BACKEND_EXPORT MindRTBackend : public MindRTBackendBase {
|
||||
public:
|
||||
MindRTBackend(const std::string &backend_name, const std::string &device_name, uint32_t device_id);
|
||||
MindRTBackend(const std::string &backend_name, const std::string &device_name, uint32_t device_id)
|
||||
: MindRTBackendBase(backend_name, device_name, device_id) {}
|
||||
~MindRTBackend() override = default;
|
||||
|
||||
// The parameter root_graph is a root graph, and the root graph maybe contain multiple sub graphs, It will traverse
|
||||
// all sub graphs to call CompileGraph.
|
||||
const ActorInfo &CompileGraphs(const FuncGraphPtr &func_graph);
|
||||
|
||||
// Run Graph in the graph mode.
|
||||
void RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs);
|
||||
// Run single op in the PyNative mode.
|
||||
void RunOp(const session::BackendOpRunInfoPtr &op_run_info, VectorRef *outputs);
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
void SetDebuggerInit();
|
||||
#endif
|
||||
|
||||
// Execute all tasks in queue when lazy build is enabled in PyNative mode.
|
||||
void WaitTaskFinish() const;
|
||||
void WaitTaskFinish() const override;
|
||||
// Clear resource when python exit.
|
||||
void ClearOpExecutorResource() const;
|
||||
// Get the device target.
|
||||
std::string GetDeviceTarget() { return device_name_; }
|
||||
|
||||
// Sync default stream in PyNative mode.
|
||||
void SyncStream();
|
||||
|
||||
private:
|
||||
// The parameter func_graph is a graph, it can be either a root graph or a sub graph,
|
||||
// The result of graph compiler is stored in graph_id_to_device_context_ and control_nodes_.
|
||||
void CompileGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode);
|
||||
|
||||
// Compile the kernel graph by the segment which is from the function graph partition.
|
||||
void CompileGraph(const GraphSegmentPtr &segment, device::RunMode run_mode);
|
||||
|
||||
// CreateKernel, Transform and Schedule have not been finished when LazyBuild is enabled in PyNative mode.
|
||||
void CompileSingleOpGraph(const KernelGraphPtr &graph, const DeviceContext *device_context) const;
|
||||
|
||||
// Get saved OpBuildTask in OpExecutor and build all the kernels together in PyNative mode.
|
||||
void CompileSingleOpGraphs(const std::vector<std::shared_ptr<runtime::OpBuildTask>> &build_tasks);
|
||||
|
||||
void ConstructOutputs(runtime::ActorSet *actor_set, VectorRef *outputs, const FuncGraphPtr &root_graph);
|
||||
|
||||
// Restore the outputs tuple by the origin funcGraph output node and output tensors.
|
||||
void ConstructOutputs(const AnfNodePtr &output_node, const std::vector<tensor::TensorPtr> &output_tensors,
|
||||
size_t *output_position, VectorRef *outputs);
|
||||
// In the control flow, the output of the call node needs to be created by abstract.
|
||||
BaseRef ConstructOutputByAbstract(const abstract::AbstractBasePtr &abstract,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors, size_t *output_position);
|
||||
// Construct the GraphCompilerInfo by the compilation results of graph, used in Graph mode.
|
||||
std::shared_ptr<GraphCompilerInfo> ConstructGraphCompilerInfo(const FuncGraphPtr &root_graph);
|
||||
|
||||
void ParseControlNodes(const GraphCompilerInfo &graph_compile_info);
|
||||
|
||||
// In PyNative mode, the size of single op cache list will be increasing, which lead to memory cost increasing,
|
||||
// so the latest single op cache should be erased when cache list size exceeds threshold value.
|
||||
void EraseSingleOpCache(const GraphInfo &graph_info);
|
||||
|
@ -171,48 +106,24 @@ class BACKEND_EXPORT MindRTBackend : public Backend {
|
|||
const session::BackendOpRunInfoPtr &op_run_info);
|
||||
|
||||
void RunGraphByCondition(const ActorInfo &actor_info, const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args, VectorRef *outputs);
|
||||
const VectorRef &args, VectorRef *outputs) override;
|
||||
// Split complete kernel graph to single op graph in PyNative back
|
||||
// propagation, then compile and run single op graph.
|
||||
void RunGraphBySingleOp(const GraphCompilerInfo &graph_compiler_info, const VectorRef &args, VectorRef *outputs);
|
||||
|
||||
void RunGraphByActors(const ActorInfo &actor_info, const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args, VectorRef *outputs);
|
||||
|
||||
void UpdateOutput(const std::vector<session::KernelWithIndex> &output_nodes, VectorRef *const outputs);
|
||||
|
||||
void ReleaseForwardOutput(const std::vector<TensorPtr> &input_tensors);
|
||||
|
||||
void OpRunCallback(const std::shared_ptr<runtime::OpTaskContext> &context);
|
||||
|
||||
// When compiling FuncGraph, it is divided according to the control nodes, and obtain the control nodes and several
|
||||
// node segments. Node segments will be compiled into kernelGraphs which are expressed as GraphId and bound to
|
||||
// the corresponding device_context.
|
||||
std::map<GraphId, DeviceContext *> graph_id_to_device_context_;
|
||||
// Funcgraph will be cut into multiple kernel graphs, and the map is used to save the correspondence.
|
||||
// The kernel graphs which not cut by control flow are placed in the same group.
|
||||
std::map<FuncGraphPtr, std::vector<std::vector<GraphId>>> func_graph_to_kernel_graph_ids_;
|
||||
std::map<GraphInfo, DeviceContext *> graph_info_to_device_context_;
|
||||
std::vector<AnfNodePtr> control_nodes_;
|
||||
|
||||
mindspore::HashMap<ActorInfo, std::shared_ptr<GraphCompilerInfo>> actor_to_graph_compiler_info_;
|
||||
|
||||
// Cache output tensor ref count of kernels for back propagation graph in PyNative mode.
|
||||
std::map<GraphId, std::map<KernelWithIndex, size_t>> cnode_ref_counts_;
|
||||
|
||||
// Cache forward op output value node tensor ref count of kernels for back propagation graph in PyNative mode.
|
||||
std::map<std::string, size_t> forward_op_output_tensor_id_;
|
||||
|
||||
FuncGraphPtr root_graph_;
|
||||
GraphPartitionPtr graph_partition_;
|
||||
std::shared_ptr<GraphCompiler> graph_compiler_;
|
||||
std::string device_name_;
|
||||
uint32_t device_id_;
|
||||
int ms_execution_mode_{kGraphMode};
|
||||
int real_execution_mode_{kGraphMode};
|
||||
void CompileSubGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode = device::RunMode::kUnknown);
|
||||
void ProcessNotSupportCnode(const FuncGraphPtr &func_graph, const device::DeviceType &old_target,
|
||||
const device::DeviceType &new_target) const;
|
||||
};
|
||||
using MindRTBackendPtr = std::shared_ptr<compile::MindRTBackend>;
|
||||
} // namespace compile
|
||||
|
|
|
@ -0,0 +1,753 @@
|
|||
/**
|
||||
* Copyright 2019-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/graph_compiler/backend_base.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "backend/graph_compiler/transform.h"
|
||||
#include "ir/anf.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "runtime/graph_scheduler/graph_compiler.h"
|
||||
#include "runtime/pynative/graph_adapter.h"
|
||||
#include "distributed/recovery/recovery_context.h"
|
||||
#include "include/common/utils/scoped_long_running.h"
|
||||
#include "include/common/utils/callbacks.h"
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debugger/debugger.h"
|
||||
#endif
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_context.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace compile {
|
||||
bool Backend::GetCond(const BaseRef &c, bool *value) {
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
return BaseRefToBool(c, value);
|
||||
}
|
||||
bool Backend::GetIndex(const BaseRef &c, int64_t *value) { return BaseRefToInt(utils::cast<ValuePtr>(c), value); }
|
||||
|
||||
Backend::Backend(const std::string &name) : name_(name), is_multi_graph_sink_(false) {
|
||||
MS_LOG(DEBUG) << "Select backend:" << name;
|
||||
convert_fn_ = MsVmConvert;
|
||||
}
|
||||
|
||||
void PushInputTensor(const BaseRef &arg, std::vector<tensor::TensorPtr> *inputs) {
|
||||
MS_EXCEPTION_IF_NULL(inputs);
|
||||
if (utils::isa<tensor::TensorPtr>(arg)) {
|
||||
auto value = utils::cast<tensor::TensorPtr>(arg);
|
||||
inputs->push_back(value);
|
||||
} else if (utils::isa<ValuePtr>(arg)) {
|
||||
auto value = utils::cast<ValuePtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
auto value_tuple = value->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple);
|
||||
auto tuple_value = value_tuple->value();
|
||||
(void)std::transform(tuple_value.begin(), tuple_value.end(), std::back_inserter(*inputs),
|
||||
[](const ValuePtr &v) { return v->cast<tensor::TensorPtr>(); });
|
||||
} else if (value->isa<Scalar>()) {
|
||||
tensor::TensorPtr scalar_tensor = ScalarToTensor(value->cast<ScalarPtr>());
|
||||
inputs->push_back(scalar_tensor);
|
||||
} else if (value->isa<Monad>()) {
|
||||
// If value is a monad, replace it with an unused tensor.
|
||||
inputs->push_back(std::make_shared<tensor::Tensor>(int64_t(0), kBool));
|
||||
} else {
|
||||
inputs->push_back(value->cast<tensor::TensorPtr>());
|
||||
}
|
||||
} else if (utils::isa<PyObjectRef>(arg)) {
|
||||
auto value = utils::cast<PyObjectRef>(arg).object_;
|
||||
inputs->push_back(py::cast<tensor::TensorPtr>(value));
|
||||
} else if (utils::isa<VectorRefPtr>(arg)) {
|
||||
const auto &args_new = utils::cast<VectorRef>(arg);
|
||||
for (const auto &v : args_new) {
|
||||
PushInputTensor(v, inputs);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Invalid input type.";
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Move these function to anonymous namespace
|
||||
void FlatValueTupleValue(const ValuePtrList &value, ValuePtrList *flatted_value) {
|
||||
MS_EXCEPTION_IF_NULL(flatted_value);
|
||||
for (auto value_element : value) {
|
||||
MS_EXCEPTION_IF_NULL(value_element);
|
||||
if (utils::isa<tensor::TensorPtr>(value_element)) {
|
||||
(void)flatted_value->emplace_back(value_element);
|
||||
} else if (utils::isa<ValueTuplePtr>(value_element)) {
|
||||
auto value_tuple_element = value_element->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple_element);
|
||||
FlatValueTupleValue(value_tuple_element->value(), flatted_value);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "The value input to FlatValueTupleValue should only contains Tensor and ValueTuple.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlattenValue(const BaseRef &arg, ValuePtrList *flatted_value) {
|
||||
MS_EXCEPTION_IF_NULL(flatted_value);
|
||||
if (utils::isa<ValueSequencePtr>(arg)) {
|
||||
auto value_sequence = utils::cast<ValueSequencePtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value_sequence);
|
||||
auto sequence_value = value_sequence->value();
|
||||
for (auto &value : sequence_value) {
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<tensor::Tensor>()) {
|
||||
(void)flatted_value->emplace_back(value);
|
||||
} else {
|
||||
FlattenValue(value, flatted_value);
|
||||
}
|
||||
}
|
||||
} else if (utils::isa<ValueDictionaryPtr>(arg)) {
|
||||
auto value_dict = utils::cast<ValueDictionaryPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(value_dict);
|
||||
auto dict_value = value_dict->value();
|
||||
for (auto &iter : dict_value) {
|
||||
auto value = iter.second;
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<tensor::Tensor>()) {
|
||||
(void)flatted_value->emplace_back(value);
|
||||
} else {
|
||||
FlattenValue(value, flatted_value);
|
||||
}
|
||||
}
|
||||
} else if (utils::isa<tensor::COOTensorPtr>(arg)) {
|
||||
auto coo_tensor = utils::cast<tensor::COOTensorPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(coo_tensor);
|
||||
for (size_t i = 0; i < coo_tensor->GetTensorLength(); ++i) {
|
||||
(void)flatted_value->emplace_back(coo_tensor->GetTensorAt(i));
|
||||
}
|
||||
} else if (utils::isa<tensor::CSRTensorPtr>(arg)) {
|
||||
auto csr_tensor = utils::cast<tensor::CSRTensorPtr>(arg);
|
||||
MS_EXCEPTION_IF_NULL(csr_tensor);
|
||||
for (size_t i = 0; i < csr_tensor->GetTensorLength(); ++i) {
|
||||
(void)flatted_value->emplace_back(csr_tensor->GetTensorAt(i));
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "The value input to flatten should only contains be sequence or dictionary, but it is "
|
||||
<< arg.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the front_node related tensor in the input_tensor.
|
||||
void PushTensor(const VectorRef &args, const std::vector<AnfNodePtr> ¶meters, const AnfNodePtr &front_node,
|
||||
std::vector<tensor::TensorPtr> *input_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(input_tensors);
|
||||
const auto &iter = std::find(parameters.begin(), parameters.end(), front_node);
|
||||
if (iter == parameters.end()) {
|
||||
(void)((*input_tensors).emplace_back(nullptr));
|
||||
return;
|
||||
}
|
||||
auto position = iter - parameters.begin();
|
||||
PushInputTensor(args[position], input_tensors);
|
||||
}
|
||||
|
||||
void PushTupleTensor(const VectorRef &args, const std::vector<AnfNodePtr> ¶meters, const AnfNodePtr &front_node,
|
||||
size_t index, std::vector<tensor::TensorPtr> *input_tensors) {
|
||||
MS_EXCEPTION_IF_NULL(input_tensors);
|
||||
const auto &iter = std::find(parameters.begin(), parameters.end(), front_node);
|
||||
const size_t position = iter - parameters.begin();
|
||||
// If the parameter is not found in the parameters of the root graph, it means that it is the input of the subgraph,
|
||||
// and there is no need to input a tensor.
|
||||
if (position >= args.size()) {
|
||||
MS_LOG(DEBUG) << "Position out of args range, position value is " << position << " and args size is " << args.size()
|
||||
<< ".";
|
||||
(void)input_tensors->emplace_back(nullptr);
|
||||
return;
|
||||
}
|
||||
ValuePtrList flatted_value_tuple_value;
|
||||
FlattenValue(args[position], &flatted_value_tuple_value);
|
||||
if (index >= flatted_value_tuple_value.size()) {
|
||||
MS_LOG(EXCEPTION) << "Index out of flatted_value_tuple_value range, index value is " << index
|
||||
<< " and flatted_value_tuple_value size is " << flatted_value_tuple_value.size() << ".";
|
||||
}
|
||||
auto input = flatted_value_tuple_value[index];
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
auto tensor_input = input->cast<tensor::TensorPtr>();
|
||||
input_tensors->push_back(tensor_input);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<std::vector<tensor::TensorPtr>> GetRunGraphInputs(const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args) {
|
||||
const auto &origin_parameters = graph_compiler_info.origin_parameters_order_;
|
||||
std::vector<std::vector<tensor::TensorPtr>> input_tensor_lists;
|
||||
for (const auto &kernel_graph : graph_compiler_info.graphs_) {
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
for (const auto &input_node : kernel_graph->input_nodes()) {
|
||||
auto element_pair = kernel_graph->GetElementInTupleBackendFrontIndexMap(input_node);
|
||||
if (element_pair.first) {
|
||||
PushTupleTensor(args, origin_parameters, element_pair.first, element_pair.second, &input_tensors);
|
||||
} else {
|
||||
const auto &front_node = kernel_graph->GetFrontAnfByBackendAnf(input_node);
|
||||
PushTensor(args, origin_parameters, front_node, &input_tensors);
|
||||
}
|
||||
}
|
||||
(void)input_tensor_lists.emplace_back(input_tensors);
|
||||
}
|
||||
|
||||
// Input tensors of the control node.
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_info.control_node_parser_);
|
||||
// Get inputs of control node which come from the host actor.
|
||||
const auto &control_node_parameters = graph_compiler_info.control_node_parser_->control_node_parameters();
|
||||
for (const auto ¶meter_with_index : control_node_parameters) {
|
||||
const auto ¶meter = parameter_with_index.first;
|
||||
MS_EXCEPTION_IF_NULL(parameter);
|
||||
const auto &abs = parameter->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abs);
|
||||
if (abs->isa<abstract::AbstractTuple>()) {
|
||||
MS_LOG(DEBUG) << "Fetch input tensor for tuple parameter:" << parameter->DebugString() << " in control flow.";
|
||||
PushTupleTensor(args, origin_parameters, parameter, parameter_with_index.second, &input_tensors);
|
||||
} else {
|
||||
PushTensor(args, origin_parameters, parameter, &input_tensors);
|
||||
}
|
||||
}
|
||||
(void)input_tensor_lists.emplace_back(input_tensors);
|
||||
|
||||
return input_tensor_lists;
|
||||
}
|
||||
|
||||
MindRTBackendBase::MindRTBackendBase(const std::string &backend_name, const std::string &device_name,
|
||||
uint32_t device_id)
|
||||
: Backend(backend_name), device_name_(device_name) {
|
||||
root_graph_ = nullptr;
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
|
||||
auto &cut_list = pynative_mode ? GetControlOps() : GetMsNonlinearOps();
|
||||
|
||||
graph_partition_ = std::make_shared<GraphPartition>(cut_list, backend_name);
|
||||
graph_compiler_ = std::make_shared<GraphCompiler>();
|
||||
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_name, device_id});
|
||||
device_context->Initialize();
|
||||
device_id_ = device_context->device_context_key().device_id_;
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
SetDebuggerInit();
|
||||
#endif
|
||||
runtime::GraphScheduler::GetInstance().Initialize();
|
||||
}
|
||||
|
||||
void MindRTBackendBase::ProcessNotSupportCnode(const FuncGraphPtr &func_graph,
|
||||
const mindspore::device::DeviceType &old_target,
|
||||
const mindspore::device::DeviceType &new_target) const {
|
||||
const auto &all_nodes = TopoSort(func_graph->return_node(), SuccDeeperSimple, AlwaysInclude);
|
||||
for (const auto &node : all_nodes) {
|
||||
if (!node->isa<CNode>()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
if (!common::AnfAlgo::HasNodeAttr(kAttrNotSupportOpForDevice, cnode)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto not_support_device = common::AnfAlgo::GetNodeAttr<std::string>(node, kAttrNotSupportOpForDevice);
|
||||
if (device::GetDeviceTypeByName(not_support_device) != old_target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
common::AnfAlgo::SetNodeAttr(kAttrPrimitiveTarget, MakeValue(device::GetDeviceNameByType(new_target)), node);
|
||||
}
|
||||
}
|
||||
|
||||
const ActorInfo &MindRTBackendBase::CompileGraphs(const FuncGraphPtr &func_graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_LOG(INFO) << "Status record: start compile function graph: " << func_graph->ToString();
|
||||
PROF_START(compile_func_graph);
|
||||
auto root_graph = WrapPrimitives(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
root_graph_ = root_graph;
|
||||
// Register a summary callback function, which is called in the final stages of summary.
|
||||
graph_compiler_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
ms_execution_mode_ = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
|
||||
real_execution_mode_ = ms_execution_mode_;
|
||||
func_graph->set_flag(kFlagPyNativeRunInGraph, real_execution_mode_ == kPynativeMode);
|
||||
|
||||
// Compile root graph.
|
||||
graph_id_to_device_context_.clear();
|
||||
func_graph_to_kernel_graph_ids_.clear();
|
||||
control_nodes_.clear();
|
||||
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_name_, device_id_});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
bool all_support = device_context->PartitionGraph(func_graph);
|
||||
if (all_support) {
|
||||
auto run_mode = device_context->GetRunMode(func_graph);
|
||||
if (run_mode == device::RunMode::kGraphMode && pynative::GraphAdapter::PyNativeEnableTaskSink(func_graph)) {
|
||||
auto graph_id = graph_compiler_->CompileWholeGraphForGraphRunMode(func_graph, device_context);
|
||||
graph_id_to_device_context_[graph_id] = device_context;
|
||||
} else {
|
||||
CompileSubGraph(func_graph, device::RunMode::kKernelMode);
|
||||
}
|
||||
} else {
|
||||
ProcessNotSupportCnode(func_graph, device_context->GetDeviceType(), mindspore::device::DeviceType::kCPU);
|
||||
CompileSubGraph(func_graph);
|
||||
}
|
||||
|
||||
// Construct the graph compiler info.
|
||||
auto graph_compiler_info = ConstructGraphCompilerInfo(root_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_info);
|
||||
if (real_execution_mode_ == kGraphMode &&
|
||||
((!graph_compiler_info->graphs_.empty()) || graph_compiler_info->control_nodes_.size() > 1)) {
|
||||
// Transform graph to actor DAG, and schedule the actor DAG.
|
||||
ParseControlNodes(*graph_compiler_info);
|
||||
const auto &actor_set = runtime::GraphScheduler::GetInstance().Transform(*graph_compiler_info);
|
||||
runtime::GraphScheduler::GetInstance().Schedule(actor_set);
|
||||
}
|
||||
const ActorInfo &actor_info = graph_compiler_info->name_;
|
||||
(void)actor_to_graph_compiler_info_.emplace(graph_compiler_info->name_, std::move(graph_compiler_info));
|
||||
PROF_END(compile_func_graph);
|
||||
|
||||
if (ms_execution_mode_ != real_execution_mode_) {
|
||||
context_ptr->set_param<int>(MS_CTX_EXECUTION_MODE, ms_execution_mode_);
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Status record: end compile function graph: " << func_graph->ToString()
|
||||
<< ", produce actor: " << actor_info;
|
||||
return actor_info;
|
||||
}
|
||||
|
||||
void MindRTBackendBase::CompileSubGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode) {
|
||||
auto root_graph = WrapPrimitives(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
CompileGraph(root_graph, run_mode);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(root_graph->manager());
|
||||
FuncGraphSet sub_graphs = root_graph->manager()->func_graphs();
|
||||
for (const auto &sub_graph : sub_graphs) {
|
||||
if (sub_graph != func_graph && sub_graph != nullptr) {
|
||||
CompileGraph(sub_graph, run_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackendBase::CompileGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode) {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_partition_);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
|
||||
bool contain_multi_target = false;
|
||||
// Split graph to segments.
|
||||
const auto &segments = graph_partition_->Partition(func_graph, &contain_multi_target);
|
||||
MS_LOG(INFO) << "Compile graph: " << func_graph->ToString() << ", Split segments size:" << segments.size();
|
||||
|
||||
// Foreach the segments to compile graph.
|
||||
for (const auto &segment : segments) {
|
||||
CompileGraph(segment, run_mode);
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackendBase::CompileGraph(const GraphSegmentPtr &segment, device::RunMode run_mode) {
|
||||
MS_EXCEPTION_IF_NULL(segment);
|
||||
// Compile the normal nodes, which doesn't contain the cut node.
|
||||
if (segment->nodes_.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "The segments size is 0.";
|
||||
}
|
||||
if (!segment->is_cut_) {
|
||||
MS_EXCEPTION_IF_NULL(segment->nodes_[0]);
|
||||
MS_LOG(INFO) << "Compile normal segment, the first node: " << segment->nodes_[0]->DebugString();
|
||||
|
||||
// Get the device context.
|
||||
const auto &cur_device_name = GetCNodeTarget(segment->nodes_[0]);
|
||||
const auto &device_context =
|
||||
device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({cur_device_name, device_id_});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
device_context->Initialize();
|
||||
|
||||
// Transform nodes to inputs and outputs.
|
||||
FuncGraphPtr fg;
|
||||
AnfNodePtrList inputs;
|
||||
AnfNodePtrList outputs;
|
||||
std::tie(fg, inputs, outputs) = TransformSegmentToAnfGraph(segment->nodes_);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
// Compile graph.
|
||||
auto graph_id =
|
||||
graph_compiler_->CompileGraph(segment, outputs, device_context, run_mode, real_execution_mode_ == kPynativeMode);
|
||||
|
||||
graph_id_to_device_context_[graph_id] = device_context;
|
||||
|
||||
const auto &func_graph = segment->nodes_[0]->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
if (func_graph_to_kernel_graph_ids_.find(func_graph) == func_graph_to_kernel_graph_ids_.end()) {
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].emplace_back(std::vector<GraphId>{graph_id});
|
||||
} else {
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].back().emplace_back(graph_id);
|
||||
}
|
||||
} else {
|
||||
// Compile the cut node.
|
||||
auto cut_node = segment->nodes_[0];
|
||||
MS_EXCEPTION_IF_NULL(cut_node);
|
||||
MS_LOG(INFO) << "Compile cut segment, the cut node: " << cut_node->DebugString();
|
||||
control_nodes_.push_back(cut_node);
|
||||
if (common::AnfAlgo::IsCallNode(cut_node) || common::AnfAlgo::CheckPrimitiveType(cut_node, prim::kPrimSwitch) ||
|
||||
common::AnfAlgo::CheckPrimitiveType(cut_node, prim::kPrimSwitchLayer)) {
|
||||
const auto &func_graph = cut_node->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
(void)func_graph_to_kernel_graph_ids_[func_graph].emplace_back(std::vector<GraphId>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TensorValueToVector(const ValuePtr &value, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
auto value_tuple = value->cast<ValueTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(value_tuple);
|
||||
for (size_t i = 0; i < value_tuple->size(); ++i) {
|
||||
ValuePtr element = value_tuple->value()[i];
|
||||
MS_EXCEPTION_IF_NULL(element);
|
||||
if (element->isa<tensor::Tensor>()) {
|
||||
auto tensor = element->cast<tensor::TensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
outputs->emplace_back(tensor);
|
||||
} else if (element->isa<ValueTuple>()) {
|
||||
VectorRef tuple;
|
||||
TensorValueToVector(element, &tuple);
|
||||
outputs->emplace_back(tuple);
|
||||
}
|
||||
}
|
||||
} else if (value->isa<tensor::Tensor>()) {
|
||||
auto tensor = value->cast<tensor::TensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
outputs->emplace_back(tensor);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &graph_output, const VectorRef &args, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(graph_output);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
if (graph_output->isa<ValueNode>()) {
|
||||
MS_LOG(INFO) << "Graph's output is a constant. No need to execute.";
|
||||
VectorRef output_tmp;
|
||||
ValuePtr value = GetValueNode(graph_output);
|
||||
TensorValueToVector(value, &output_tmp);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
outputs->emplace_back(output_tmp);
|
||||
} else if (value->isa<tensor::Tensor>()) {
|
||||
*outputs = output_tmp;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Graph output is empty!";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (graph_output->isa<Parameter>()) {
|
||||
MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute.";
|
||||
// Find the right parameter as ret_val.
|
||||
auto func_graph = graph_output->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto params = func_graph->parameters();
|
||||
if (args.size() != params.size()) {
|
||||
MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to graph input size " << params.size();
|
||||
}
|
||||
|
||||
auto it = std::find(params.begin(), params.end(), graph_output);
|
||||
if (it == params.end()) {
|
||||
MS_EXCEPTION(UnknownError) << "When graph output is Parameter, it should be found in graph parameters";
|
||||
}
|
||||
size_t index = it - params.cbegin();
|
||||
if (index >= args.size()) {
|
||||
MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size();
|
||||
}
|
||||
|
||||
outputs->emplace_back(args[index]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void MindRTBackendBase::ConstructOutputs(runtime::ActorSet *actor_set, VectorRef *outputs,
|
||||
const FuncGraphPtr &root_graph) {
|
||||
bool need_contruct_output = !(distributed::recovery::RecoveryContext::GetInstance()->enable_recovery() &&
|
||||
distributed::recovery::RecoveryContext::GetInstance()->need_reset());
|
||||
bool is_embedding_cache_server = false;
|
||||
#ifdef WITH_BACKEND
|
||||
is_embedding_cache_server = ps::PSContext::instance()->cache_enable() && ps::PSContext::instance()->is_server();
|
||||
#endif
|
||||
if (need_contruct_output) {
|
||||
// Update device address for output node of graph.
|
||||
// Summary processing will use the output device address, so must be after the summary processing.
|
||||
if (!is_embedding_cache_server) {
|
||||
actor_set->output_actor_->UpdateOutputDeviceAddress();
|
||||
}
|
||||
|
||||
// Fetch outputs.
|
||||
MS_EXCEPTION_IF_NULL(actor_set->output_actor_);
|
||||
auto &output_tensors = actor_set->output_actor_->outputs();
|
||||
if (!output_tensors.empty()) {
|
||||
size_t output_position = 0;
|
||||
ConstructOutputs(root_graph->output(), output_tensors, &output_position, outputs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackendBase::RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(root_graph_);
|
||||
if (IsGraphOutputValueNodeOrParameter(root_graph_->output(), args, outputs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto &context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
|
||||
MS_LOG(INFO) << "PrecompileOnly, stop run graph";
|
||||
return;
|
||||
}
|
||||
|
||||
// Open abstract_lock for dynamic_shape
|
||||
AnfUtils::OpenAbstractLock();
|
||||
|
||||
MS_LOG(INFO) << "Status record: start run actor: " << actor_info;
|
||||
// Fetch the graph compiler info.
|
||||
const auto &graph_iter = actor_to_graph_compiler_info_.find(actor_info);
|
||||
if (graph_iter == actor_to_graph_compiler_info_.end()) {
|
||||
MS_LOG(EXCEPTION) << "Can't find the graph compiler info.";
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(graph_iter->second);
|
||||
const auto &graph_compiler_info = *(graph_iter->second);
|
||||
// For pynative and graph mix execution.
|
||||
WaitTaskFinish();
|
||||
|
||||
// Run in the pynative mode.
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
// There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
|
||||
if (real_execution_mode_ == kPynativeMode) {
|
||||
RunGraphByCondition(actor_info, graph_compiler_info, args, outputs);
|
||||
return;
|
||||
}
|
||||
|
||||
auto input_tensors = GetRunGraphInputs(graph_compiler_info, args);
|
||||
// Release python gil.
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
// Run actor DAG.
|
||||
const auto &actor_set = runtime::GraphScheduler::GetInstance().Fetch(actor_info);
|
||||
MS_EXCEPTION_IF_NULL(actor_set);
|
||||
runtime::GraphScheduler::GetInstance().Run(actor_set, input_tensors);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
graph_compiler_->Summary(graph_compiler_info.graphs_);
|
||||
|
||||
ConstructOutputs(actor_set, outputs, root_graph_);
|
||||
|
||||
runtime::GraphScheduler::GetInstance().ClearActorData(actor_set);
|
||||
// Close abstract_lock for dynamic_shape
|
||||
AnfUtils::CloseAbstractLock();
|
||||
MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
|
||||
}
|
||||
|
||||
BaseRef MindRTBackendBase::ConstructOutputByAbstract(const abstract::AbstractBasePtr &abstract,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors,
|
||||
size_t *output_position) {
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
MS_EXCEPTION_IF_NULL(output_position);
|
||||
|
||||
size_t outputs_num = common::AnfAlgo::GetOutputNumByAbstract(abstract);
|
||||
if (*output_position + outputs_num > output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position << " need:" << outputs_num
|
||||
<< " total:" << output_tensors.size();
|
||||
}
|
||||
VectorRef outputs;
|
||||
|
||||
if (!abstract->isa<abstract::AbstractTuple>()) {
|
||||
(*output_position)++;
|
||||
return output_tensors[(*output_position) - 1];
|
||||
}
|
||||
|
||||
auto tuple_abstract = abstract->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_abstract);
|
||||
const auto &sub_abstracts = tuple_abstract->elements();
|
||||
for (const auto &sub_abstract : sub_abstracts) {
|
||||
MS_EXCEPTION_IF_NULL(sub_abstract);
|
||||
outputs.emplace_back(ConstructOutputByAbstract(sub_abstract, output_tensors, output_position));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
void MindRTBackendBase::ConstructOutputs(const AnfNodePtr &output_node,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors, size_t *output_position,
|
||||
VectorRef *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
MS_EXCEPTION_IF_NULL(output_position);
|
||||
const PrimitiveSet expand_prims{
|
||||
prim::kPrimMakeTuple,
|
||||
prim::kPrimMakeCSRTensor,
|
||||
prim::kPrimMakeCOOTensor,
|
||||
prim::kPrimMakeRowTensor,
|
||||
};
|
||||
// The MakeTuple/MakeSaprse node need expand and recurse.
|
||||
if (IsOneOfPrimitiveCNode(output_node, expand_prims)) {
|
||||
auto make_tuple = output_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(make_tuple);
|
||||
VectorRef make_tuple_output;
|
||||
for (size_t i = 1; i < make_tuple->inputs().size(); i++) {
|
||||
ConstructOutputs(make_tuple->input(i), output_tensors, output_position, &make_tuple_output);
|
||||
}
|
||||
outputs->emplace_back(std::move(make_tuple_output));
|
||||
return;
|
||||
}
|
||||
|
||||
// The depend node need get the real node.
|
||||
if (common::AnfAlgo::CheckPrimitiveType(output_node, prim::kPrimDepend)) {
|
||||
auto depend_node = output_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(depend_node);
|
||||
ConstructOutputs(depend_node->input(kRealInputIndexInDepend), output_tensors, output_position, outputs);
|
||||
return;
|
||||
}
|
||||
|
||||
auto outputs_num = common::AnfAlgo::GetOutputTensorNum(output_node);
|
||||
// The value node uses the value to be output, to avoid the host memory of value free due to value node destruction.
|
||||
if (output_node->isa<ValueNode>()) {
|
||||
auto value = output_node->cast<ValueNodePtr>()->value();
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (value->isa<ValueTuple>()) {
|
||||
outputs->emplace_back(value);
|
||||
(*output_position) += CountValueNum(value->cast<ValueTuplePtr>());
|
||||
} else if (outputs_num != 0) {
|
||||
outputs->emplace_back(value);
|
||||
(*output_position) += outputs_num;
|
||||
}
|
||||
// The empty value node return the empty VectorRef.
|
||||
return;
|
||||
}
|
||||
|
||||
if (common::AnfAlgo::IsCallNode(output_node)) {
|
||||
auto abstract = output_node->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
outputs->emplace_back(ConstructOutputByAbstract(abstract, output_tensors, output_position));
|
||||
return;
|
||||
}
|
||||
|
||||
auto &output_abstract = output_node->abstract();
|
||||
MS_EXCEPTION_IF_NULL(output_abstract);
|
||||
// Wrap output to VectorRef if the output is tuple.
|
||||
if (output_abstract->isa<abstract::AbstractTuple>()) {
|
||||
VectorRef output_tuple;
|
||||
for (size_t i = 0; i < outputs_num; ++i) {
|
||||
if (*output_position >= output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position;
|
||||
}
|
||||
output_tuple.emplace_back(std::move(output_tensors[*output_position]));
|
||||
++(*output_position);
|
||||
}
|
||||
outputs->emplace_back(std::move(output_tuple));
|
||||
} else {
|
||||
for (size_t i = 0; i < outputs_num; ++i) {
|
||||
if (*output_position >= output_tensors.size()) {
|
||||
MS_LOG(EXCEPTION) << "The output position is out of range: " << *output_position;
|
||||
}
|
||||
outputs->emplace_back(std::move(output_tensors[*output_position]));
|
||||
++(*output_position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
void MindRTBackendBase::SetDebuggerInit() {
|
||||
auto debugger_ = Debugger::GetInstance();
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
debugger_->Init(device_id_, ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET));
|
||||
}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<GraphCompilerInfo> MindRTBackendBase::ConstructGraphCompilerInfo(const FuncGraphPtr &root_graph) {
|
||||
MS_EXCEPTION_IF_NULL(root_graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_compiler_);
|
||||
|
||||
std::vector<KernelGraphPtr> graphs;
|
||||
std::vector<DeviceContext *> device_contexts;
|
||||
std::string name = "kernel_graph";
|
||||
size_t graph_index = 0;
|
||||
for (const auto &graph_id_to_context : graph_id_to_device_context_) {
|
||||
(void)graphs.emplace_back(graph_compiler_->Fetch(graph_id_to_context.first));
|
||||
(void)device_contexts.emplace_back(graph_id_to_context.second);
|
||||
if (graph_index == 0) {
|
||||
(void)name.append("_").append(std::to_string(graph_id_to_context.first));
|
||||
} else if (graph_index == graph_id_to_device_context_.size() - 1) {
|
||||
(void)name.append("-").append(std::to_string(graph_id_to_context.first));
|
||||
}
|
||||
++graph_index;
|
||||
}
|
||||
|
||||
auto parser = std::make_shared<ControlNodeParser>();
|
||||
|
||||
runtime::KernelMapPosition outputs_order;
|
||||
const auto &root_output =
|
||||
common::AnfAlgo::VisitKernelWithReturnType(root_graph->output(), 0, false, {prim::kPrimTupleGetItem}).first;
|
||||
size_t position = 0;
|
||||
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(root_output);
|
||||
size_t outputs_num = outputs.size();
|
||||
for (const auto &output : outputs) {
|
||||
if (outputs_order.count(output) == 0) {
|
||||
outputs_order[output] = {position++};
|
||||
} else {
|
||||
(void)outputs_order[output].emplace_back(position++);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t> *> tensors_mask;
|
||||
std::vector<std::vector<tensor::TensorPtr> *> input_tensors;
|
||||
auto strategy = runtime::GraphExecutionStrategy::kPipeline;
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
|
||||
strategy = runtime::GraphExecutionStrategy::kPipelineWithExecutionOrder;
|
||||
}
|
||||
return std::make_shared<GraphCompilerInfo>(graphs, device_contexts, tensors_mask, input_tensors, control_nodes_,
|
||||
root_graph->parameters(), parser, outputs_order, outputs_num, name, false,
|
||||
strategy);
|
||||
}
|
||||
|
||||
void MindRTBackendBase::ParseControlNodes(const GraphCompilerInfo &graph_compile_info) {
|
||||
FuncGraphToKernelGraphGroup func_graph_to_kernel_graphs;
|
||||
for (const auto &func_graph_to_kernel_graph_ids : func_graph_to_kernel_graph_ids_) {
|
||||
const auto &func_graph = func_graph_to_kernel_graph_ids.first;
|
||||
for (const auto &sub_kernel_graphs_ids : func_graph_to_kernel_graph_ids.second) {
|
||||
std::vector<KernelGraphPtr> kernel_graphs;
|
||||
for (const auto &graph_id : sub_kernel_graphs_ids) {
|
||||
const auto &kernel_graph = graph_compiler_->Fetch(graph_id);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
(void)kernel_graphs.emplace_back(kernel_graph);
|
||||
}
|
||||
(void)func_graph_to_kernel_graphs[func_graph].emplace_back(kernel_graphs);
|
||||
}
|
||||
}
|
||||
|
||||
graph_compile_info.control_node_parser_->Parse(control_nodes_, graph_compile_info.graphs_,
|
||||
graph_compile_info.device_contexts_, root_graph_,
|
||||
func_graph_to_kernel_graphs);
|
||||
}
|
||||
} // namespace compile
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* Copyright 2019-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_VM_BACKENDBASE_H_
|
||||
#define MINDSPORE_CCSRC_VM_BACKENDBASE_H_
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "utils/hash_map.h"
|
||||
#include "ir/anf.h"
|
||||
#include "backend/common/session/session_basic.h"
|
||||
#include "runtime/hardware/device_context.h"
|
||||
#include "backend/graph_compiler/segment_runner.h"
|
||||
#include "runtime/graph_scheduler/actor/actor_set.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace compile {
|
||||
using GraphOutputInfo = session::GraphOutputInfo;
|
||||
using DeviceContext = device::DeviceContext;
|
||||
using ActorInfo = runtime::ActorInfo;
|
||||
using GraphCompiler = runtime::GraphCompiler;
|
||||
using GraphCompilerInfo = runtime::GraphCompilerInfo;
|
||||
using ControlNodeParser = runtime::ControlNodeParser;
|
||||
using FuncGraphToKernelGraphGroup = runtime::FuncGraphToKernelGraphGroup;
|
||||
using ControlNodeParserPtr = runtime::ControlNodeParserPtr;
|
||||
using KernelWithIndex = session::KernelWithIndex;
|
||||
|
||||
enum SwitchCondStatus {
|
||||
kCondOk = 0,
|
||||
kCondAlreadyRun,
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT Backend {
|
||||
public:
|
||||
explicit Backend(const std::string &name);
|
||||
|
||||
virtual ~Backend() = default;
|
||||
|
||||
LinkFuncType convert_fn() { return convert_fn_; }
|
||||
std::string name() { return name_; }
|
||||
virtual bool GetCond(const BaseRef &c, bool *value);
|
||||
virtual bool GetIndex(const BaseRef &c, int64_t *value);
|
||||
virtual GraphId CompileGraph(NotNull<FuncGraphPtr> fg) { return kInvalidGraphId; }
|
||||
virtual void SetDebugger() {}
|
||||
|
||||
bool is_multi_graph_sink() const { return is_multi_graph_sink_; }
|
||||
void set_is_multi_graph_sink(bool flag) { is_multi_graph_sink_ = flag; }
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
LinkFuncType convert_fn_;
|
||||
bool is_multi_graph_sink_;
|
||||
};
|
||||
|
||||
void PushInputTensor(const BaseRef &arg, std::vector<tensor::TensorPtr> *inputs);
|
||||
std::vector<std::vector<tensor::TensorPtr>> GetRunGraphInputs(const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args);
|
||||
|
||||
class BACKEND_EXPORT MindRTBackendBase : public Backend {
|
||||
public:
|
||||
MindRTBackendBase(const std::string &backend_name, const std::string &device_name, uint32_t device_id);
|
||||
~MindRTBackendBase() override = default;
|
||||
|
||||
// The parameter root_graph is a root graph, and the root graph maybe contain multiple sub graphs, It will traverse
|
||||
// all sub graphs to call CompileGraph.
|
||||
const ActorInfo &CompileGraphs(const FuncGraphPtr &func_graph);
|
||||
|
||||
// Run Graph in the graph mode.
|
||||
void RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs);
|
||||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
void SetDebuggerInit();
|
||||
#endif
|
||||
|
||||
// Get the device target.
|
||||
std::string GetDeviceTarget() { return device_name_; }
|
||||
|
||||
virtual void WaitTaskFinish() const {}
|
||||
virtual void RunGraphByCondition(const ActorInfo &actor_info, const GraphCompilerInfo &graph_compiler_info,
|
||||
const VectorRef &args, VectorRef *outputs) {}
|
||||
|
||||
protected:
|
||||
// The parameter func_graph is a graph, it can be either a root graph or a sub graph,
|
||||
// The result of graph compiler is stored in graph_id_to_device_context_ and control_nodes_.
|
||||
void CompileGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode);
|
||||
|
||||
// Compile the kernel graph by the segment which is from the function graph partition.
|
||||
void CompileGraph(const GraphSegmentPtr &segment, device::RunMode run_mode);
|
||||
|
||||
void ConstructOutputs(runtime::ActorSet *actor_set, VectorRef *outputs, const FuncGraphPtr &root_graph);
|
||||
|
||||
// Restore the outputs tuple by the origin funcGraph output node and output tensors.
|
||||
void ConstructOutputs(const AnfNodePtr &output_node, const std::vector<tensor::TensorPtr> &output_tensors,
|
||||
size_t *output_position, VectorRef *outputs);
|
||||
// In the control flow, the output of the call node needs to be created by abstract.
|
||||
BaseRef ConstructOutputByAbstract(const abstract::AbstractBasePtr &abstract,
|
||||
const std::vector<tensor::TensorPtr> &output_tensors, size_t *output_position);
|
||||
// Construct the GraphCompilerInfo by the compilation results of graph, used in Graph mode.
|
||||
std::shared_ptr<GraphCompilerInfo> ConstructGraphCompilerInfo(const FuncGraphPtr &root_graph);
|
||||
|
||||
void ParseControlNodes(const GraphCompilerInfo &graph_compile_info);
|
||||
|
||||
// When compiling FuncGraph, it is divided according to the control nodes, and obtain the control nodes and several
|
||||
// node segments. Node segments will be compiled into kernelGraphs which are expressed as GraphId and bound to
|
||||
// the corresponding device_context.
|
||||
std::map<GraphId, DeviceContext *> graph_id_to_device_context_;
|
||||
// Funcgraph will be cut into multiple kernel graphs, and the map is used to save the correspondence.
|
||||
// The kernel graphs which not cut by control flow are placed in the same group.
|
||||
std::map<FuncGraphPtr, std::vector<std::vector<GraphId>>> func_graph_to_kernel_graph_ids_;
|
||||
std::map<GraphInfo, DeviceContext *> graph_info_to_device_context_;
|
||||
std::vector<AnfNodePtr> control_nodes_;
|
||||
|
||||
mindspore::HashMap<ActorInfo, std::shared_ptr<GraphCompilerInfo>> actor_to_graph_compiler_info_;
|
||||
|
||||
FuncGraphPtr root_graph_;
|
||||
GraphPartitionPtr graph_partition_;
|
||||
std::shared_ptr<GraphCompiler> graph_compiler_;
|
||||
std::string device_name_;
|
||||
uint32_t device_id_;
|
||||
int ms_execution_mode_{kGraphMode};
|
||||
int real_execution_mode_{kGraphMode};
|
||||
void CompileSubGraph(const FuncGraphPtr &func_graph, device::RunMode run_mode = device::RunMode::kUnknown);
|
||||
void ProcessNotSupportCnode(const FuncGraphPtr &func_graph, const device::DeviceType &old_target,
|
||||
const device::DeviceType &new_target) const;
|
||||
};
|
||||
} // namespace compile
|
||||
} // namespace mindspore
|
||||
#endif
|
Loading…
Reference in New Issue