[bugfix] Heterogenous scenario in a ms function of PyNative Mode occur core dump

2021-07-06 10:06:51 +08:00 · 2021-07-06 10:06:51 +08:00 · dadfe54ced
parent 83d6ab79e1
commit dadfe54ced
7 changed files with 45 additions and 10 deletions
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@ -1192,6 +1192,7 @@ void SessionBasic::GetParameterIndex(const KernelGraph *graph, const std::vector
                          << ", input size: " << inputs.size();
      }
      const auto &input = inputs[index];
      MS_EXCEPTION_IF_NULL(input);
      // Check shape of input and parameter
      const auto &input_shape = input->shape();
      const auto &param_shape = AnfAlgo::GetOutputInferShape(param, 0);
--- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
@ -168,6 +168,10 @@ void CreateKernelOutputDeviceAddress(const DeviceContext *device_context, const
  MS_EXCEPTION_IF_NULL(graph);
  const std::vector<CNodePtr> &kernels = graph->execution_order();
  for (const auto &kernel : kernels) {
    MS_EXCEPTION_IF_NULL(kernel);
    if (AnfAlgo::IsControlOpExecInBackend(kernel)) {
      continue;
    }
    auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
    MS_EXCEPTION_IF_NULL(kernel_mod);
    auto output_sizes = kernel_mod->GetOutputSizeList();
@ -190,6 +194,10 @@ void CreateKernelWorkspaceDeviceAddress(const DeviceContext *device_context, con
  MS_EXCEPTION_IF_NULL(graph);
  const std::vector<CNodePtr> &kernels = graph->execution_order();
  for (const auto &kernel : kernels) {
    MS_EXCEPTION_IF_NULL(kernel);
    if (AnfAlgo::IsControlOpExecInBackend(kernel)) {
      continue;
    }
    auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
    MS_EXCEPTION_IF_NULL(kernel_mod);
    auto workspace_sizes = kernel_mod->GetWorkspaceSizeList();
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@ -494,6 +494,11 @@ void GraphScheduler::Initialize() {
 }
 ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info) {
  // Local maps and vectors clear.
  graph_output_to_actor_.clear();
  front_node_to_actor_.clear();
  copy_actors_.clear();
  MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor begin.";
  if (graph_compiler_info.graphs_.size() == 0) {
    MS_LOG(EXCEPTION) << "The number of graphs is zero.";
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@ -349,6 +349,11 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
  // Register a summary callback function, which is called in the final stages of summary.
  graph_compiler_->RegisterSummaryCallBackFunc(callbacks::SummarySaveCallback);
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  ms_execution_mode_ = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
  real_execution_mode_ = ms_execution_mode_;
  // Compile root graph.
  graph_id_to_device_context_.clear();
  control_nodes_.clear();
@ -365,10 +370,7 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
  // Construct the graph compiler info.
  auto graph_compiler_info = ConstructGraphCompilerInfo(root_graph_);
-  auto context_ptr = MsContext::GetInstance();
+  if (real_execution_mode_ == kGraphMode) {
  MS_EXCEPTION_IF_NULL(context_ptr);
  const bool graph_mode = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode;
  if (graph_mode) {
    // Transform graph to actor DAG, and schedule the actor DAG.
    const auto &actor_set = runtime::GraphScheduler::GetInstance().Transform(*graph_compiler_info);
    runtime::GraphScheduler::GetInstance().Schedule(actor_set);
@ -383,9 +385,12 @@ void MindRTBackend::CompileGraph(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(graph_partition_);
  MS_EXCEPTION_IF_NULL(graph_compiler_);
  bool contain_multi_target;
  // Split graph to segments.
-  const auto &segments = graph_partition_->Partition(func_graph);
+  const auto &segments = graph_partition_->Partition(func_graph, &contain_multi_target);
  MS_LOG(INFO) << "Compile graph: " << func_graph->ToString() << ", Split segments size:" << segments.size();
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  // Foreach the segments to compile graph.
  for (const auto &segment : segments) {
@ -409,8 +414,19 @@ void MindRTBackend::CompileGraph(const FuncGraphPtr &func_graph) {
      AnfNodePtrList outputs;
      std::tie(fg, inputs, outputs) = TransformSegmentToAnfGraph(segment->nodes_);
      // There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
      if (contain_multi_target && ms_execution_mode_ == kPynativeMode) {
        real_execution_mode_ = kGraphMode;
        context_ptr->set_param<int>(MS_CTX_EXECUTION_MODE, kGraphMode);
      }
      // Compile graph.
      auto graph_id = graph_compiler_->CompileGraph(segment->nodes_, outputs, device_context);
      if (ms_execution_mode_ != real_execution_mode_) {
        context_ptr->set_param<int>(MS_CTX_EXECUTION_MODE, ms_execution_mode_);
      }
      graph_id_to_device_context_[graph_id] = device_context;
    } else {
      // Compile the cut node.
@ -726,9 +742,8 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
  // Run in the pynative mode.
  MS_EXCEPTION_IF_NULL(outputs);
-  auto ms_context = MsContext::GetInstance();
+  // There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
-  const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
+  if (real_execution_mode_ == kPynativeMode) {
  if (pynative_mode) {
    RunGraphBySingleOp(graph_compiler_info.graphs_, input_tensors, outputs);
    return;
  }
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@ -163,6 +163,8 @@ class MindRTBackend : public Backend {
  std::shared_ptr<GraphCompiler> graph_compiler_;
  std::string device_name_;
  uint32_t device_id_;
  int ms_execution_mode_{kGraphMode};
  int real_execution_mode_{kGraphMode};
 };
 }  // namespace compile
 }  // namespace mindspore
--- a/mindspore/ccsrc/vm/graph_partition.cc
+++ b/mindspore/ccsrc/vm/graph_partition.cc
@ -588,11 +588,15 @@ bool GraphPartition::IsCut(const AnfNodePtr &node) {
  return false;
 }
-std::vector<GraphSegmentPtr> GraphPartition::Partition(const FuncGraphPtr &graph) {
+std::vector<GraphSegmentPtr> GraphPartition::Partition(const FuncGraphPtr &graph, bool *multi_target) {
  MS_EXCEPTION_IF_NULL(graph);
  auto nodes = TopoSort(graph->get_return());
  MS_LOG(DEBUG) << "Split all nodes size:" << nodes.size();
  bool contain_multi_target = ContainMultiTarget(nodes);
  if (multi_target != nullptr) {
    *multi_target = contain_multi_target;
  }
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  std::string default_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
--- a/mindspore/ccsrc/vm/graph_partition.h
+++ b/mindspore/ccsrc/vm/graph_partition.h
@ -34,7 +34,7 @@ class GraphPartition {
 public:
  explicit GraphPartition(const std::vector<PrimitivePtr> &cut_list, const std::string &backend_name);
  ~GraphPartition() = default;
-  std::vector<GraphSegmentPtr> Partition(const FuncGraphPtr &func_graph);
+  std::vector<GraphSegmentPtr> Partition(const FuncGraphPtr &func_graph, bool *multi_target = nullptr);
 private:
  bool IsCut(const AnfNodePtr &node);