unified runtime optimize actor dump

2021-10-11 10:22:56 +08:00 · 2021-10-11 10:22:56 +08:00 · a1ef8140a2
parent 8b42909722
commit a1ef8140a2
14 changed files with 410 additions and 304 deletions
--- a/mindspore/ccsrc/runtime/framework/actor/abstract_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/abstract_actor.cc
@ -96,13 +96,13 @@ void AbstractActor::SendOutput(OpContext<DeviceTensor> *const context) {
  MS_EXCEPTION_IF_NULL(context);
  // Must be the execution order: send result --> send data --> send control, avoid the illegal timing problem.
  // 1.Send graph output result.
-  if (output_result_arrows_.size() != output_nodes_.size()) {
+  if (output_result_arrows_.size() != output_result_nodes_.size()) {
    SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The size of output result arrows is not equal to the output nodes.");
  }
  size_t output_node_index = 0;
  for (const auto &result_arrow : output_result_arrows_) {
    MS_EXCEPTION_IF_NULL(result_arrow);
-    Async(result_arrow->to_op_id_, &OutputActor::CollectOutput, output_nodes_[output_node_index++],
+    Async(result_arrow->to_op_id_, &OutputActor::CollectOutput, output_result_nodes_[output_node_index++],
          result_arrow->from_output_index_, result_arrow->to_input_index_, context);
  }

--- a/mindspore/ccsrc/runtime/framework/actor/abstract_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/abstract_actor.h
@ -53,6 +53,17 @@ class AbstractActor : public OpActor<DeviceTensor> {
  // Get the position of node in the actor.
  virtual size_t FetchNodePosition(const AnfNodePtr &node) const { return 0; }

+  // Get the member.
+  KernelTransformType type() const { return type_; }
+  const std::vector<const DeviceContext *> &device_contexts() const { return device_contexts_; }
+  const std::vector<AnfNodePtr> &output_result_nodes() const { return output_result_nodes_; }
+  const std::vector<DataArrowPtr> &output_result_arrows() const { return output_result_arrows_; }
+  const std::vector<std::pair<size_t, AnfNodePtr>> &device_tensor_store_keys() const {
+    return device_tensor_store_keys_;
+  }
+  const std::vector<AID> &input_data_arrow_aids() const { return input_data_arrow_aids_; }
+  const std::vector<AID> &input_control_arrow_aids() const { return input_control_arrow_aids_; }
+
 protected:
  friend class GraphScheduler;

@ -84,7 +95,7 @@ class AbstractActor : public OpActor<DeviceTensor> {
  std::vector<OpDataUniquePtr<DeviceTensor>> output_data_;

  // The output nodes and output result arrows of graph output.
-  std::vector<AnfNodePtr> output_nodes_;
+  std::vector<AnfNodePtr> output_result_nodes_;
  std::vector<DataArrowPtr> output_result_arrows_;

  // The dependent device tensor stores,  the dependent expression is pair<index, AnfNode>.
--- a/mindspore/ccsrc/runtime/framework/actor/actor_dump.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_dump.cc
@ -0,0 +1,303 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/framework/actor/actor_dump.h"
+
+namespace mindspore {
+namespace runtime {
+namespace {
+void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  if (actor->device_contexts().size() > 0) {
+    ofs << "\t\tdevice_contexts:" << actor->device_contexts().size() << "\n ";
+    for (const auto &device_context : actor->device_contexts()) {
+      if (device_context == nullptr) {
+        ofs << "\t\t\tdevice_context:" << device_context << "\n";
+        continue;
+      }
+      ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
+    }
+  }
+
+  if (actor->device_tensor_store_keys().size() > 0) {
+    ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys().size() << "\n ";
+    for (const auto &device_tensor_store_key : actor->device_tensor_store_keys()) {
+      MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
+      ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
+          << "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
+    }
+  }
+
+  if (actor->input_data_arrow_aids().size() > 0) {
+    ofs << "\t\tinput_data_arrow_actors:" << actor->input_data_arrow_aids().size() << "\n ";
+    for (const auto &input_data_arrow_aid : actor->input_data_arrow_aids()) {
+      ofs << "\t\t\tfrom_actor_name:" << input_data_arrow_aid.Name() << "\n";
+    }
+  }
+
+  if (actor->input_control_arrow_aids().size() > 0) {
+    ofs << "\t\tinput_control_arrow_actors:" << actor->input_control_arrow_aids().size() << "\n ";
+    for (const auto &input_control_arrow_aid : actor->input_control_arrow_aids()) {
+      ofs << "\t\t\tfrom_actor_name:" << input_control_arrow_aid.Name() << "\n";
+    }
+  }
+
+  const auto &output_data_arrows = actor->output_data_arrows();
+  if (output_data_arrows.size() > 0) {
+    ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
+    for (const auto &data_arrow : output_data_arrows) {
+      MS_EXCEPTION_IF_NULL(data_arrow);
+      ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
+          << "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
+          << "\n";
+    }
+  }
+
+  const auto &output_control_arrows = actor->output_control_arrows();
+  if (output_control_arrows.size() > 0) {
+    ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
+    for (const auto &aid : output_control_arrows) {
+      ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
+    }
+  }
+
+  if (actor->output_result_arrows().size() != actor->output_result_nodes().size()) {
+    MS_LOG(EXCEPTION) << "The size of output result arrows is not equal to the output nodes.";
+  }
+  if (actor->output_result_arrows().size() > 0) {
+    ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows().size() << "\n ";
+    for (size_t i = 0; i < actor->output_result_arrows().size(); ++i) {
+      auto result_arrow = actor->output_result_arrows()[i];
+      auto output_node = actor->output_result_nodes()[i];
+      MS_EXCEPTION_IF_NULL(result_arrow);
+      MS_EXCEPTION_IF_NULL(output_node);
+      ofs << "\t\t\tfrom_output_node:" << output_node->fullname_with_scope()
+          << "\tfrom_output_index:" << result_arrow->from_output_index_
+          << "\tto_actor_name:" << result_arrow->to_op_id_.Name()
+          << "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
+    }
+  }
+}
+
+void DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  const auto &actor_name = actor->GetAID().Name();
+  ofs << "\tactor_name:" << actor_name << "\n";
+
+  if (actor->type() == KernelTransformType::kDeviceDataSourceActor) {
+    // Dump the member info of device queue data source actor.
+    const auto &device_queue_ds_actor = dynamic_cast<const DeviceQueueDataSourceActor *>(actor);
+    MS_EXCEPTION_IF_NULL(device_queue_ds_actor);
+    const auto &data_kernel = device_queue_ds_actor->data_kernel();
+    MS_EXCEPTION_IF_NULL(data_kernel);
+    ofs << "\t\tdata_kernel_name:" << data_kernel->fullname_with_scope()
+        << "\tinput_number:" << AnfAlgo::GetInputTensorNum(data_kernel)
+        << "\toutput_number:" << AnfAlgo::GetOutputTensorNum(data_kernel) << "\n";
+    for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(data_kernel); ++i) {
+      const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_kernel, i, false);
+      MS_EXCEPTION_IF_NULL(device_tensor);
+      ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
+          << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
+    }
+  } else if (actor->type() == KernelTransformType::kHostDataSourceActor) {
+    // Dump the member info of host queue data source actor.
+    const auto &host_queue_ds_actor = dynamic_cast<const HostQueueDataSourceActor *>(actor);
+    MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
+    ofs << "\t\tdata_nodes:" << host_queue_ds_actor->data_nodes().size() << "\n";
+    for (size_t i = 0; i < host_queue_ds_actor->data_nodes().size(); ++i) {
+      const auto &data_node = host_queue_ds_actor->data_nodes()[i];
+      MS_EXCEPTION_IF_NULL(data_node);
+      const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_node, 0, false);
+      MS_EXCEPTION_IF_NULL(device_tensor);
+      ofs << "\t\t\tnode_order_number:" << i << "\tnode_name:" << data_node->fullname_with_scope()
+          << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
+          << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n";
+    }
+  }
+
+  DumpAbstractActor(actor, ofs);
+  ofs << "\n";
+}
+
+void DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+
+  const auto &kernel = actor->kernel();
+  MS_EXCEPTION_IF_NULL(kernel);
+  ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinputs_num:" << AnfAlgo::GetInputTensorNum(kernel)
+      << "\toutputs_num:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
+  for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel); ++i) {
+    const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
+    MS_EXCEPTION_IF_NULL(device_tensor);
+    ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
+        << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
+  }
+
+  DumpAbstractActor(actor, ofs);
+  ofs << "\n";
+}
+
+void DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+
+  const auto &graph = actor->graph();
+  MS_EXCEPTION_IF_NULL(graph);
+
+  ofs << "\t\tgraph_id:" << graph->graph_id() << "\tgraphl_name:" << graph->ToString()
+      << "\tis_sink:" << graph->is_sink() << "\tinputs_num:" << (graph->input_nodes()).size()
+      << "\tkernels_num:" << (graph->execution_order()).size() << "\n";
+
+  DumpAbstractActor(actor, ofs);
+  ofs << "\n";
+}
+
+void DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+
+  auto device_tensor = actor->output();
+  if (device_tensor != nullptr) {
+    ofs << "\t\toutput_index:" << 0 << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
+        << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
+  }
+
+  DumpAbstractActor(actor, ofs);
+  ofs << "\n";
+}
+
+void DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
+}
+
+void DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) {
+  MS_EXCEPTION_IF_NULL(actor);
+  ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
+}
+}  // namespace
+
+void DumpDataPrepareActor(const DataPrepareActorPtr &actor, std::ofstream &ofs) {
+  ofs << "\n\n[Data prepare actor:" << (actor != nullptr ? 1 : 0) << "]\n";
+  if (actor == nullptr) {
+    return;
+  }
+
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
+  DumpAbstractActor(actor.get(), ofs);
+
+  ofs << "\t\tcontinuous_memory_nodes:" << actor->continuous_memory_nodes().size() << "\n ";
+  for (const auto &iter : actor->continuous_memory_nodes()) {
+    MS_EXCEPTION_IF_NULL(iter.first.first);
+    MS_EXCEPTION_IF_NULL(iter.first.second);
+    ofs << "\t\t\tnode_name:" << iter.first.first->fullname_with_scope()
+        << "\tdevice_context:" << iter.first.second->device_context_key().ToString()
+        << "\tis_input_need:" << iter.second.first << "\tis_output_need:" << iter.second.second << "\n";
+  }
+}
+
+void DumpLoopCountActor(const LoopCountActorPtr &actor, std::ofstream &ofs) {
+  ofs << "\n\n[Loop count actor:" << (actor != nullptr ? 1 : 0) << "]\n";
+  if (actor == nullptr) {
+    return;
+  }
+
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count() << "\n";
+  DumpAbstractActor(actor.get(), ofs);
+
+  const size_t kOutputControlArrowsNum = 2;
+  ofs << "\t\toutput_control_arrows:" << kOutputControlArrowsNum << "\n ";
+  ofs << "\t\t\tto_actor_name:" << actor->output_aid().Name() << "\n";
+  ofs << "\t\t\tto_actor_name:" << actor->data_prepare_aid().Name() << "\n";
+}
+
+void DumpOutputActor(const OutputActorPtr &actor, std::ofstream &ofs) {
+  ofs << "\n\n[Output actor:" << (actor != nullptr ? 1 : 0) << "]\n";
+  if (actor == nullptr) {
+    return;
+  }
+
+  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count()
+      << "\toutputs_num:" << actor->outputs_num() << "\n";
+
+  DumpAbstractActor(actor.get(), ofs);
+
+  ofs << "\t\tinput_result_arrows:" << actor->input_result_arrow_aids().size() << "\n ";
+  for (const auto &input_result_arrow_aid : actor->input_result_arrow_aids()) {
+    ofs << "\t\t\tfrom_actor_name:" << input_result_arrow_aid.Name() << "\n";
+  }
+}
+
+void DumpDSActors(const std::vector<DataSourceActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Data source actors:" << actors.size() << "]\n";
+  for (const auto &data_source_actor : actors) {
+    DumpDSActor(data_source_actor.get(), ofs);
+  }
+}
+
+void DumpKernelActors(const std::vector<KernelActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Kernel actors:" << actors.size() << "]\n";
+  for (const auto &kernel_actor : actors) {
+    DumpKernelActor(kernel_actor.get(), ofs);
+  }
+}
+
+void DumpSuperKernelActors(const std::vector<SuperKernelActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Super kernel actors:" << actors.size() << "]\n";
+  for (const auto &super_kernel_actor : actors) {
+    DumpSuperKernelActor(super_kernel_actor.get(), ofs);
+  }
+}
+
+void DumpNoInputKernelActors(const std::vector<AbstractActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[No input kernel actors:" << actors.size() << "]\n";
+  for (const auto &actor : actors) {
+    MS_EXCEPTION_IF_NULL(actor);
+    if (actor->type() == KernelTransformType::kKernelActor) {
+      auto kernel_actor = dynamic_cast<const KernelActor *>(actor.get());
+      MS_EXCEPTION_IF_NULL(kernel_actor);
+      DumpKernelActor(kernel_actor, ofs);
+    } else if (actor->type() == KernelTransformType::kSuperKernelActor) {
+      auto super_kernel_actor = dynamic_cast<const SuperKernelActor *>(actor.get());
+      MS_EXCEPTION_IF_NULL(super_kernel_actor);
+      DumpSuperKernelActor(super_kernel_actor, ofs);
+    }
+  }
+}
+
+void DumpCopyActors(const std::vector<CopyActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Copy actors:" << actors.size() << "]\n";
+  for (const auto &copy_actor : actors) {
+    DumpCopyActor(copy_actor.get(), ofs);
+  }
+}
+
+void DumpGatherActors(const std::vector<GatherActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Gather actors:" << actors.size() << "]\n";
+  for (const auto &gather_actor : actors) {
+    DumpGatherActor(gather_actor.get(), ofs);
+  }
+}
+
+void DumpSwitchActors(const std::vector<SwitchActorPtr> &actors, std::ofstream &ofs) {
+  ofs << "\n\n[Switch actors:" << actors.size() << "]\n";
+  for (const auto &switch_actor : actors) {
+    DumpSwitchActor(switch_actor.get(), ofs);
+  }
+}
+}  // namespace runtime
+}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/framework/actor/actor_dump.h
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_dump.h
@ -0,0 +1,51 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_
+#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <utility>
+#include <fstream>
+#include "runtime/framework/actor/abstract_actor.h"
+#include "runtime/framework/actor/data_prepare_actor.h"
+#include "runtime/framework/actor/data_source_actor.h"
+#include "runtime/framework/actor/loop_count_actor.h"
+#include "runtime/framework/actor/kernel_actor.h"
+#include "runtime/framework/actor/super_kernel_actor.h"
+#include "runtime/framework/actor/output_actor.h"
+#include "runtime/framework/actor/copy_actor.h"
+#include "runtime/framework/actor/control_flow/switch_actor.h"
+#include "runtime/framework/actor/control_flow/gather_actor.h"
+
+namespace mindspore {
+namespace runtime {
+void DumpDataPrepareActor(const DataPrepareActorPtr &actor, std::ofstream &ofs);
+void DumpLoopCountActor(const LoopCountActorPtr &actor, std::ofstream &ofs);
+void DumpOutputActor(const OutputActorPtr &actor, std::ofstream &ofs);
+void DumpDSActors(const std::vector<DataSourceActorPtr> &actors, std::ofstream &ofs);
+void DumpKernelActors(const std::vector<KernelActorPtr> &actors, std::ofstream &ofs);
+void DumpSuperKernelActors(const std::vector<SuperKernelActorPtr> &actors, std::ofstream &ofs);
+void DumpNoInputKernelActors(const std::vector<AbstractActorPtr> &actors, std::ofstream &ofs);
+void DumpCopyActors(const std::vector<CopyActorPtr> &actors, std::ofstream &ofs);
+void DumpGatherActors(const std::vector<GatherActorPtr> &actors, std::ofstream &ofs);
+void DumpSwitchActors(const std::vector<SwitchActorPtr> &actors, std::ofstream &ofs);
+}  // namespace runtime
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_
--- a/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/copy_actor.h
@ -48,6 +48,8 @@ class CopyActor : public MemoryAwareActor {
  // The copy processing after memory alloc finished.
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;

+  const DeviceTensorPtr &output() const { return output_; }
+
 protected:
  void Run(OpContext<DeviceTensor> *const context) override;
  void UpdateOutputData(OpData<DeviceTensor> *const output_data, const DataArrow *data_arrow,
--- a/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/data_prepare_actor.h
@ -61,6 +61,10 @@ class DataPrepareActor : public DebugAwareActor {
  void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) override;
  void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;

+  const std::map<std::pair<CNodePtr, DeviceContext *>, std::pair<bool, bool>> &continuous_memory_nodes() const {
+    return continuous_memory_nodes_;
+  }
+
 private:
  friend class GraphScheduler;

--- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h
@ -89,6 +89,8 @@ class DeviceQueueDataSourceActor : public DataSourceActor {
  void SendDebugReq(OpContext<DeviceTensor> *const context) override;
  void OnDebugFinish(OpContext<DeviceTensor> *const context) override;

+  const CNodePtr &data_kernel() const { return data_kernel_; }
+
 protected:
  void FillDataBuffer() override;
  void SendRecorderInfo(OpContext<DeviceTensor> *const context) const override;
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h
@ -73,6 +73,8 @@ class KernelActor : public DebugAwareActor {
  // The callback after debug finished.
  void OnDebugFinish(OpContext<DeviceTensor> *const context) override;

+  const CNodePtr &kernel() const { return kernel_; }
+
 protected:
  void Run(OpContext<DeviceTensor> *const context) override;
  void SendRecorderInfo(OpContext<DeviceTensor> *const context) const override;
--- a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h
@ -51,6 +51,11 @@ class LoopCountActor : public DebugAwareActor {
  // The callback after debug finished.
  void OnDebugFinish(OpContext<DeviceTensor> *const context) override;

+  // Get the member.
+  size_t loop_count() const { return loop_count_; }
+  const AID &data_prepare_aid() const { return data_prepare_aid_; }
+  const AID &output_aid() const { return output_aid_; }
+
 protected:
  void Run(OpContext<DeviceTensor> *const context) override;
  void SendOutput(OpContext<DeviceTensor> *const context) override;
--- a/mindspore/ccsrc/runtime/framework/actor/output_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/output_actor.h
@ -66,6 +66,10 @@ class OutputActor : public AbstractActor {
  // context of tensor be rewritten in the next step or next loop.
  void UpdateOutputDeviceAddress();

+  // Get the member.
+  size_t loop_count() const { return loop_count_; }
+  size_t outputs_num() const { return outputs_num_; }
+  const std::vector<AID> &input_result_arrow_aids() const { return input_result_arrow_aids_; }
  std::vector<TensorPtr> &outputs() { return outputs_; }

 private:
--- a/mindspore/ccsrc/runtime/framework/actor/super_kernel_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/super_kernel_actor.h
@ -42,6 +42,8 @@ class SuperKernelActor : public DebugAwareActor {

  void Init() override;

+  const KernelGraphPtr &graph() const { return graph_; }
+
 protected:
  void Run(OpContext<DeviceTensor> *const context) override;

--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@ -1479,7 +1479,7 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
        }
        auto op_arrow = std::make_shared<DataArrow>(output_with_index.second, to_actor->GetAID(), output_position);
        (void)from_actor->output_result_arrows_.emplace_back(op_arrow);
-        (void)from_actor->output_nodes_.emplace_back(output_with_index.first);
+        (void)from_actor->output_result_nodes_.emplace_back(output_with_index.first);
        (void)to_actor->input_result_arrow_aids_.emplace_back(from_actor->GetAID());

        // Update the real compute node in the host data source actor.
@ -1488,7 +1488,7 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
          MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
          auto position = host_queue_ds_actor->FetchNodePosition(output_with_index.first);
          auto real_node = host_queue_ds_actor->FetchNode(position);
-          from_actor->output_nodes_[from_actor->output_nodes_.size() - 1] = real_node;
+          from_actor->output_result_nodes_[from_actor->output_result_nodes_.size() - 1] = real_node;
          UpdateRefCount(real_node, output_with_index.second, true);
        }
      }
@ -1757,277 +1757,22 @@ void GraphScheduler::DumpActor(const ActorSet *actor_set, const GraphCompilerInf
    return;
  }

-  ofs << "[Device tensor stores]\n";
  DumpDeviceTensorStore(graph_compiler_info, ofs);
-
-  const auto &data_prepare_actor = actor_set->data_prepare_actor_;
-  ofs << "\n\n[Data prepare actor:" << (data_prepare_actor != nullptr ? 1 : 0) << "]\n";
-  if (data_prepare_actor != nullptr) {
-    DumpDataPrepareActor(data_prepare_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Data source actors:" << actor_set->data_source_actors_.size() << "]\n";
-  for (const auto &data_source_actor : actor_set->data_source_actors_) {
-    DumpDSActor(data_source_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Kernel actors:" << actor_set->kernel_actors_.size() << "]\n";
-  for (const auto &kernel_actor : actor_set->kernel_actors_) {
-    DumpKernelActor(kernel_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Super kernel actors:" << actor_set->super_kernel_actors_.size() << "]\n";
-  for (const auto &super_kernel_actor : actor_set->super_kernel_actors_) {
-    DumpSuperKernelActor(super_kernel_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[No input kernel actors:" << actor_set->no_input_kernel_actors_.size() << "]\n";
-  for (const auto &no_input_kernel_actor : actor_set->no_input_kernel_actors_) {
-    DumpNoInputKernelActor(no_input_kernel_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Copy actors:" << actor_set->copy_actors_.size() << "]\n";
-  for (const auto &copy_actor : actor_set->copy_actors_) {
-    DumpCopyActor(copy_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Gather actors:" << actor_set->gather_actors_.size() << "]\n";
-  for (const auto &gather_actor : actor_set->gather_actors_) {
-    DumpGatherActor(gather_actor.get(), ofs);
-  }
-
-  ofs << "\n\n[Switch actors:" << actor_set->switch_actors_.size() << "]\n";
-  for (const auto &switch_actor : actor_set->switch_actors_) {
-    DumpSwitchActor(switch_actor.get(), ofs);
-  }
-
-  const auto &loop_count_actor = actor_set->loop_count_actor_;
-  ofs << "\n\n[Loop count actor:" << (loop_count_actor != nullptr ? 1 : 0) << "]\n";
-  if (loop_count_actor != nullptr) {
-    DumpLoopCountActor(loop_count_actor.get(), ofs);
-  }
-
-  const auto &output_actor = actor_set->output_actor_;
-  ofs << "\n\n[Output actor:" << (output_actor != nullptr ? 1 : 0) << "]\n";
-  if (output_actor != nullptr) {
-    DumpOutputActor(output_actor.get(), ofs);
-  }
-}
-
-void GraphScheduler::DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  if (actor->device_contexts_.size() > 0) {
-    ofs << "\t\tdevice_contexts:" << actor->device_contexts_.size() << "\n ";
-    for (const auto &device_context : actor->device_contexts_) {
-      if (device_context == nullptr) {
-        ofs << "\t\t\tdevice_context:" << device_context << "\n";
-        continue;
-      }
-      ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
-    }
-  }
-
-  if (actor->device_tensor_store_keys_.size() > 0) {
-    ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys_.size() << "\n ";
-    for (const auto &device_tensor_store_key : actor->device_tensor_store_keys_) {
-      MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
-      ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
-          << "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
-    }
-  }
-
-  if (actor->input_data_arrow_aids_.size() > 0) {
-    ofs << "\t\tinput_data_arrow_actors:" << actor->input_data_arrow_aids_.size() << "\n ";
-    for (const auto &input_data_arrow_aid : actor->input_data_arrow_aids_) {
-      ofs << "\t\t\tfrom_actor_name:" << input_data_arrow_aid.Name() << "\n";
-    }
-  }
-
-  if (actor->input_control_arrow_aids_.size() > 0) {
-    ofs << "\t\tinput_control_arrow_actors:" << actor->input_control_arrow_aids_.size() << "\n ";
-    for (const auto &input_control_arrow_aid : actor->input_control_arrow_aids_) {
-      ofs << "\t\t\tfrom_actor_name:" << input_control_arrow_aid.Name() << "\n";
-    }
-  }
-
-  const auto &output_data_arrows = actor->output_data_arrows();
-  if (output_data_arrows.size() > 0) {
-    ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
-    for (const auto &data_arrow : output_data_arrows) {
-      MS_EXCEPTION_IF_NULL(data_arrow);
-      ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
-          << "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
-          << "\n";
-    }
-  }
-
-  const auto &output_control_arrows = actor->output_control_arrows();
-  if (output_control_arrows.size() > 0) {
-    ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
-    for (const auto &aid : output_control_arrows) {
-      ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
-    }
-  }
-
-  if (actor->output_result_arrows_.size() != actor->output_nodes_.size()) {
-    MS_LOG(EXCEPTION) << "The size of output result arrows is not equal to the output nodes.";
-  }
-  if (actor->output_result_arrows_.size() > 0) {
-    ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows_.size() << "\n ";
-    for (size_t i = 0; i < actor->output_result_arrows_.size(); ++i) {
-      auto result_arrow = actor->output_result_arrows_[i];
-      auto output_node = actor->output_nodes_[i];
-      MS_EXCEPTION_IF_NULL(result_arrow);
-      MS_EXCEPTION_IF_NULL(output_node);
-      ofs << "\t\t\tfrom_output_node:" << output_node->fullname_with_scope()
-          << "\tfrom_output_index:" << result_arrow->from_output_index_
-          << "\tto_actor_name:" << result_arrow->to_op_id_.Name()
-          << "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
-    }
-  }
-}
-
-void GraphScheduler::DumpDataPrepareActor(const DataPrepareActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
-  DumpAbstractActor(actor, ofs);
-
-  ofs << "\t\tcontinuous_memory_nodes:" << actor->continuous_memory_nodes_.size() << "\n ";
-  for (const auto &iter : actor->continuous_memory_nodes_) {
-    MS_EXCEPTION_IF_NULL(iter.first.first);
-    MS_EXCEPTION_IF_NULL(iter.first.second);
-    ofs << "\t\t\tnode_name:" << iter.first.first->fullname_with_scope()
-        << "\tdevice_context:" << iter.first.second->device_context_key().ToString()
-        << "\tis_input_need:" << iter.second.first << "\tis_output_need:" << iter.second.second << "\n";
-  }
-}
-
-void GraphScheduler::DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  const auto &actor_name = actor->GetAID().Name();
-  ofs << "\tactor_name:" << actor_name << "\n";
-
-  if (actor->type_ == KernelTransformType::kDeviceDataSourceActor) {
-    // Dump the member info of device queue data source actor.
-    const auto &device_queue_ds_actor = dynamic_cast<const DeviceQueueDataSourceActor *>(actor);
-    MS_EXCEPTION_IF_NULL(device_queue_ds_actor);
-    const auto &data_kernel = device_queue_ds_actor->data_kernel_;
-    MS_EXCEPTION_IF_NULL(data_kernel);
-    ofs << "\t\tdata_kernel_name:" << data_kernel->fullname_with_scope()
-        << "\tinput_number:" << AnfAlgo::GetInputTensorNum(data_kernel)
-        << "\toutput_number:" << AnfAlgo::GetOutputTensorNum(data_kernel) << "\n";
-    for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(data_kernel); ++i) {
-      const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_kernel, i, false);
-      MS_EXCEPTION_IF_NULL(device_tensor);
-      ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
-          << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
-    }
-  } else if (actor->type_ == KernelTransformType::kHostDataSourceActor) {
-    // Dump the member info of host queue data source actor.
-    const auto &host_queue_ds_actor = dynamic_cast<const HostQueueDataSourceActor *>(actor);
-    MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
-    ofs << "\t\tdata_nodes:" << host_queue_ds_actor->data_nodes_.size() << "\n";
-    for (size_t i = 0; i < host_queue_ds_actor->data_nodes_.size(); ++i) {
-      const auto &data_node = host_queue_ds_actor->data_nodes_[i];
-      MS_EXCEPTION_IF_NULL(data_node);
-      const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_node, 0, false);
-      MS_EXCEPTION_IF_NULL(device_tensor);
-      ofs << "\t\t\tnode_order_number:" << i << "\tnode_name:" << data_node->fullname_with_scope()
-          << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
-          << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n";
-    }
-  }
-
-  DumpAbstractActor(actor, ofs);
-  ofs << "\n";
-}
-
-void GraphScheduler::DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_ << "\n";
-  DumpAbstractActor(actor, ofs);
-
-  const size_t kOutputControlArrowsNum = 2;
-  ofs << "\t\toutput_control_arrows:" << kOutputControlArrowsNum << "\n ";
-  ofs << "\t\t\tto_actor_name:" << actor->output_aid_.Name() << "\n";
-  ofs << "\t\t\tto_actor_name:" << actor->data_prepare_aid_.Name() << "\n";
-}
-
-void GraphScheduler::DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
-
-  const auto &kernel = actor->kernel_;
-  MS_EXCEPTION_IF_NULL(kernel);
-  ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinputs_num:" << AnfAlgo::GetInputTensorNum(kernel)
-      << "\toutputs_num:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
-  for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel); ++i) {
-    const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
-    MS_EXCEPTION_IF_NULL(device_tensor);
-    ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
-        << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
-  }
-
-  DumpAbstractActor(actor, ofs);
-  ofs << "\n";
-}
-
-void GraphScheduler::DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
-
-  const auto &graph = actor->graph_;
-  MS_EXCEPTION_IF_NULL(graph);
-
-  ofs << "\t\tgraph_id:" << graph->graph_id() << "\tgraphl_name:" << graph->ToString()
-      << "\tis_sink:" << graph->is_sink() << "\tinputs_num:" << (graph->input_nodes()).size()
-      << "\tkernels_num:" << (graph->execution_order()).size() << "\n";
-
-  DumpAbstractActor(actor, ofs);
-  ofs << "\n";
-}
-
-void GraphScheduler::DumpNoInputKernelActor(const AbstractActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  if (actor->type_ == KernelTransformType::kKernelActor) {
-    auto kernel_actor = dynamic_cast<const KernelActor *>(actor);
-    MS_EXCEPTION_IF_NULL(kernel_actor);
-    DumpKernelActor(kernel_actor, ofs);
-  } else if (actor->type_ == KernelTransformType::kSuperKernelActor) {
-    auto super_kernel_actor = dynamic_cast<const SuperKernelActor *>(actor);
-    MS_EXCEPTION_IF_NULL(super_kernel_actor);
-    DumpSuperKernelActor(super_kernel_actor, ofs);
-  }
-}
-
-void GraphScheduler::DumpOutputActor(const OutputActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_
-      << "\toutputs_num:" << actor->outputs_num_ << "\n";
-
-  DumpAbstractActor(actor, ofs);
-
-  ofs << "\t\tinput_result_arrows:" << actor->input_result_arrow_aids_.size() << "\n ";
-  for (const auto &input_result_arrow_aid : actor->input_result_arrow_aids_) {
-    ofs << "\t\t\tfrom_actor_name:" << input_result_arrow_aid.Name() << "\n";
-  }
-}
-
-void GraphScheduler::DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
-
-  auto device_tensor = actor->output_;
-  if (device_tensor != nullptr) {
-    ofs << "\t\toutput_index:" << 0 << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
-        << "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
-  }
-
-  DumpAbstractActor(actor, ofs);
-  ofs << "\n";
+  DumpDataPrepareActor(actor_set->data_prepare_actor_, ofs);
+  DumpDSActors(actor_set->data_source_actors_, ofs);
+  DumpKernelActors(actor_set->kernel_actors_, ofs);
+  DumpSuperKernelActors(actor_set->super_kernel_actors_, ofs);
+  DumpNoInputKernelActors(actor_set->no_input_kernel_actors_, ofs);
+  DumpCopyActors(actor_set->copy_actors_, ofs);
+  DumpGatherActors(actor_set->gather_actors_, ofs);
+  DumpSwitchActors(actor_set->switch_actors_, ofs);
+  DumpLoopCountActor(actor_set->loop_count_actor_, ofs);
+  DumpOutputActor(actor_set->output_actor_, ofs);
 }

 void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compiler_info, std::ofstream &ofs) const {
+  ofs << "[Device tensor stores]\n";
+
  for (const auto &graph : graph_compiler_info.graphs_) {
    MS_EXCEPTION_IF_NULL(graph);
    ofs << "\tgraph_id:" << graph->graph_id() << "\tis_sink:" << graph->is_sink()
@ -2076,15 +1821,5 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
    ofs << "\n";
  }
 }
-
-void GraphScheduler::DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
-}
-
-void GraphScheduler::DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) const {
-  MS_EXCEPTION_IF_NULL(actor);
-  ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
-}
 }  // namespace runtime
 }  // namespace mindspore
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.h
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.h
@ -28,15 +28,7 @@
 #include <algorithm>
 #include <fstream>
 #include "runtime/framework/graph_compiler.h"
-#include "runtime/framework/actor/data_prepare_actor.h"
-#include "runtime/framework/actor/data_source_actor.h"
-#include "runtime/framework/actor/loop_count_actor.h"
-#include "runtime/framework/actor/kernel_actor.h"
-#include "runtime/framework/actor/super_kernel_actor.h"
-#include "runtime/framework/actor/output_actor.h"
-#include "runtime/framework/actor/copy_actor.h"
-#include "runtime/framework/actor/control_flow/switch_actor.h"
-#include "runtime/framework/actor/control_flow/gather_actor.h"
+#include "runtime/framework/actor/actor_dump.h"
 #include "thread/actor_threadpool.h"

 namespace mindspore {
@ -223,17 +215,6 @@ class GraphScheduler {

  // Display the actor information of corresponding kernel graph.
  void DumpActor(const ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info) const;
-  void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const;
-  void DumpDataPrepareActor(const DataPrepareActor *actor, std::ofstream &ofs) const;
-  void DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const;
-  void DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const;
-  void DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const;
-  void DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) const;
-  void DumpNoInputKernelActor(const AbstractActor *actor, std::ofstream &ofs) const;
-  void DumpOutputActor(const OutputActor *actor, std::ofstream &ofs) const;
-  void DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) const;
-  void DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) const;
-  void DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) const;
  void DumpDeviceTensorStore(const GraphCompilerInfo &graph_compiler_info, std::ofstream &ofs) const;

  // The global maps, only be cleared in the deconstruction.
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@ -365,6 +365,7 @@ MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string
 const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(graph_compiler_);
  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_LOG(INFO) << "Status record: start compile function graph: " << func_graph->ToString();
  auto root_graph = WrapPrimitives(func_graph);
  MS_EXCEPTION_IF_NULL(root_graph);
  root_graph_ = root_graph.get();
@ -403,6 +404,8 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
  MS_EXCEPTION_IF_NULL(graph_compiler_info);
  const ActorInfo &actor_info = graph_compiler_info->name_;
  (void)actor_to_graph_compiler_info_.emplace(graph_compiler_info->name_, std::move(graph_compiler_info));
+  MS_LOG(INFO) << "Status record: end compile function graph: " << func_graph->ToString()
+               << ", produce actor: " << actor_info;
  return actor_info;
 }

@ -787,7 +790,6 @@ void MindRTBackend::RunGraphBySingleOp(const std::vector<KernelGraphPtr> &graphs
 }

 void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs) {
-  MS_LOG(INFO) << "Run actor begin, actor name: " << actor_info;
  MS_EXCEPTION_IF_NULL(root_graph_);
  if (IsGraphOutputValueNodeOrParameter(root_graph_->output(), args, outputs)) {
    return;
@ -800,6 +802,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
    return;
  }

+  MS_LOG(INFO) << "Status record: start run actor: " << actor_info;
  // Fetch the graph compiler info.
  const auto &graph_iter = actor_to_graph_compiler_info_.find(actor_info);
  if (graph_iter == actor_to_graph_compiler_info_.end()) {
@ -837,6 +840,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
  // There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
  if (real_execution_mode_ == kPynativeMode) {
    RunGraphBySingleOp(graph_compiler_info.graphs_, input_tensors, outputs);
+    MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
    return;
  }
  // Run actor DAG.
@ -875,7 +879,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,

  // Update device address for output node of graph.
  actor_set->output_actor_->UpdateOutputDeviceAddress();
-  MS_LOG(INFO) << "Run actor end, actor name: " << actor_info;
+  MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
 }

 void MindRTBackend::ConstructOutputs(const AnfNodePtr &output_node,