!19117 SetDebugger for MindRTBackend and clean debug_actor code

Merge pull request !19117 from parastooashtari/new_unified_gpu
2021-06-30 09:25:54 +00:00 · 2021-06-30 09:25:54 +00:00 · bde38a582c
parent 207e49e591 4e4be17648
commit bde38a582c
7 changed files with 207 additions and 138 deletions
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@ -42,6 +42,7 @@ if(ENABLE_DEBUGGER)
        "${CMAKE_CURRENT_SOURCE_DIR}/debugger/proto_exporter.cc"
        "${CMAKE_CURRENT_SOURCE_DIR}/debugger/tensor_summary.cc"
        "${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc"
+        "${CMAKE_CURRENT_SOURCE_DIR}/debugger/debugger_utils.cc"
        )
 endif()
 if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -236,9 +236,6 @@ bool Debugger::CheckDebuggerDumpEnabled() const {
  // see if dump is enabled
  if (device_target_ == kGPUDevice) {
    return device::KernelRuntime::DumpDataEnabled();
-  } else if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
-    auto &dump_json_parser = DumpJsonParser::GetInstance();
-    return dump_json_parser.e2e_dump_enabled();
  }
  return false;
 }
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.cc
@ -0,0 +1,159 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debug/debugger/debugger_utils.h"
+#include <iostream>
+#include <vector>
+#include <memory>
+#include <string>
+#include "debug/debugger/debugger.h"
+#include "runtime/device/gpu/gpu_device_address.h"
+#include "debug/data_dump/dump_json_parser.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+
+using mindspore::kernel::AddressPtr;
+using mindspore::kernel::KernelLaunchInfo;
+using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
+using KernelGraph = mindspore::session::KernelGraph;
+using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
+
+namespace mindspore {
+
+static const size_t PARAMETER_OUTPUT_INDEX = 0;
+
+std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
+  // define a vector containing real output number
+  std::vector<int> real_outputs;
+  // P.BatchNorm is used for training and inference
+  // can add the filter list for more operators here....
+  if (node_name == "BatchNorm") {
+    MS_LOG(INFO) << "loading node named " << node_name;
+    real_outputs.insert(real_outputs.end(), {0, 3, 4});
+  } else {
+    // by default, TensorLoader will load all outputs
+    for (size_t j = 0; j < output_size; ++j) {
+      real_outputs.push_back(j);
+    }
+  }
+  return real_outputs;
+}
+void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
+  // get inputs
+  auto kernel_inputs = launch_info_->inputs_;
+  auto input_size = AnfAlgo::GetInputTensorNum(cnode);
+  for (size_t j = 0; j < input_size; ++j) {
+    auto input_kernel = cnode->input(j + 1);
+    std::string input_kernel_name = input_kernel->fullname_with_scope();
+    auto addr = kernel_inputs[j];
+    auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
+    // For example, this happens with the Depend op
+    if (type == kMetaTypeNone) {
+      continue;
+    }
+#ifdef ENABLE_GPU
+    auto format = kOpFormat_DEFAULT;
+    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
+    string input_tensor_name = input_kernel_name + ':' + "0";
+    ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
+    auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true);
+    if (!ret) {
+      MS_LOG(ERROR) << "LoadMemToHost:"
+                    << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
+    }
+#endif
+  }
+}
+void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
+  // get outputs
+  auto kernel_outputs = launch_info_->outputs_;
+  auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
+  auto node_name = AnfAlgo::GetCNodeName(cnode);
+  std::string kernel_name = cnode->fullname_with_scope();
+  std::vector<int> real_outputs = CheckRealOutput(node_name, output_size);
+
+  for (int j : real_outputs) {
+    auto addr = kernel_outputs[j];
+    auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
+    // For example, this happens with the Depend op
+    if (type == kMetaTypeNone) {
+      continue;
+    }
+#ifdef ENABLE_GPU
+    auto format = kOpFormat_DEFAULT;
+    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
+    string tensor_name = kernel_name + ':' + std::to_string(j);
+    ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
+    auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false);
+    if (!ret) {
+      MS_LOG(ERROR) << "LoadMemToHost:"
+                    << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
+    }
+#endif
+  }
+}
+
+bool CheckReadData(const CNodePtr &cnode) {
+  auto debugger = Debugger::GetInstance();
+  if (!debugger) {
+    return false;
+  }
+  bool read_data = false;
+  auto &dump_json_parser = DumpJsonParser::GetInstance();
+  bool dump_enabled = debugger->DumpDataEnabledIteration();
+  std::string kernel_name = cnode->fullname_with_scope();
+  if (dump_enabled) {
+    auto dump_mode = dump_json_parser.dump_mode();
+    // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
+    if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
+      read_data = true;
+    }
+  } else if (debugger->debugger_enabled()) {
+    read_data = debugger->ReadNodeDataRequired(cnode);
+  }
+  return read_data;
+}
+
+void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
+  auto debugger = Debugger::GetInstance();
+  if (!debugger) {
+    return;
+  }
+  auto &dump_json_parser = DumpJsonParser::GetInstance();
+  bool dump_enabled = debugger->DumpDataEnabledIteration();
+  if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
+    LoadInputs(cnode, launch_info_, exec_order_);
+  }
+  if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
+    LoadOutputs(cnode, launch_info_, exec_order_);
+  }
+  // Dump kernel
+  if (dump_enabled) {
+    auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
+    MS_EXCEPTION_IF_NULL(kernel_graph);
+    auto graph_id = kernel_graph->graph_id();
+    debugger->DumpSingleNode(cnode, graph_id);
+    // Clear Dumped data when online debugger is not enabled
+    if (!debugger->debugger_enabled()) {
+      debugger->ClearCurrentData();
+    }
+  }
+  // check if the node is last kernel
+  bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
+  debugger->PostExecuteNode(cnode, last_kernel);
+}
+
+}  // namespace mindspore
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.h
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.h
@ -0,0 +1,37 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include "debug/debugger/debugger.h"
+#include "backend/kernel_compiler/kernel.h"
+
+using mindspore::kernel::KernelLaunchInfo;
+
+namespace mindspore {
+
+std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size);
+
+void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
+
+void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
+
+bool CheckReadData(const CNodePtr &cnode);
+
+void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);
+
+}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@ -21,138 +21,14 @@
 #include "runtime/framework/actor/debug_aware_actor.h"
 #include "mindrt/include/async/async.h"
 #include "utils/log_adapter.h"
-#ifdef ENABLE_GPU
+#ifdef ENABLE_DEBUGGER
 #include "debug/debugger/debugger.h"
-#include "runtime/device/gpu/gpu_device_address.h"
-
-using mindspore::kernel::AddressPtr;
-using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
-using KernelGraph = mindspore::session::KernelGraph;
+#include "debug/debugger/debugger_utils.h"
 #endif
+
 namespace mindspore {
 namespace runtime {

-#ifdef ENABLE_GPU
-static const size_t PARAMETER_OUTPUT_INDEX = 0;
-
-std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
-  // define a vector containing real output number
-  std::vector<int> real_outputs;
-  // P.BatchNorm is used for training and inference
-  // can add the filter list for more operators here....
-  if (node_name == "BatchNorm") {
-    MS_LOG(INFO) << "loading node named " << node_name;
-    real_outputs.insert(real_outputs.end(), {0, 3, 4});
-  } else {
-    // by default, TensorLoader will load all outputs
-    for (size_t j = 0; j < output_size; ++j) {
-      real_outputs.push_back(j);
-    }
-  }
-  return real_outputs;
-}
-void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
-  // get inputs
-  auto kernel_inputs = launch_info_->inputs_;
-  auto input_size = AnfAlgo::GetInputTensorNum(cnode);
-  for (size_t j = 0; j < input_size; ++j) {
-    auto input_kernel = cnode->input(j + 1);
-    std::string input_kernel_name = input_kernel->fullname_with_scope();
-    auto addr = kernel_inputs[j];
-    auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
-    // For example, this happens with the Depend op
-    if (type == kMetaTypeNone) {
-      continue;
-    }
-    auto format = kOpFormat_DEFAULT;
-    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
-    string input_tensor_name = input_kernel_name + ':' + "0";
-    ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
-    auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true);
-    if (!ret) {
-      MS_LOG(ERROR) << "LoadMemToHost:"
-                    << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
-    }
-  }
-}
-void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
-  // get outputs
-  auto kernel_outputs = launch_info_->outputs_;
-  auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
-  auto node_name = AnfAlgo::GetCNodeName(cnode);
-  std::string kernel_name = cnode->fullname_with_scope();
-  std::vector<int> real_outputs = CheckRealOutput(node_name, output_size);
-
-  for (int j : real_outputs) {
-    auto addr = kernel_outputs[j];
-    auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
-    // For example, this happens with the Depend op
-    if (type == kMetaTypeNone) {
-      continue;
-    }
-    auto format = kOpFormat_DEFAULT;
-    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
-    string tensor_name = kernel_name + ':' + std::to_string(j);
-    ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
-    auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false);
-    if (!ret) {
-      MS_LOG(ERROR) << "LoadMemToHost:"
-                    << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
-    }
-  }
-}
-
-bool CheckReadData(const CNodePtr &cnode) {
-  auto debugger = Debugger::GetInstance();
-  if (!debugger) {
-    return false;
-  }
-  bool read_data = false;
-  auto &dump_json_parser = DumpJsonParser::GetInstance();
-  bool dump_enabled = debugger->DumpDataEnabledIteration();
-  std::string kernel_name = cnode->fullname_with_scope();
-  if (dump_enabled) {
-    auto dump_mode = dump_json_parser.dump_mode();
-    // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
-    if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
-      read_data = true;
-    }
-  } else if (debugger->debugger_enabled()) {
-    read_data = debugger->ReadNodeDataRequired(cnode);
-  }
-  return read_data;
-}
-
-void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
-  auto debugger = Debugger::GetInstance();
-  if (!debugger) {
-    return;
-  }
-  auto &dump_json_parser = DumpJsonParser::GetInstance();
-  bool dump_enabled = debugger->DumpDataEnabledIteration();
-  if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
-    LoadInputs(cnode, launch_info_, exec_order_);
-  }
-  if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
-    LoadOutputs(cnode, launch_info_, exec_order_);
-  }
-  // Dump kernel
-  if (dump_enabled) {
-    auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
-    MS_EXCEPTION_IF_NULL(kernel_graph);
-    auto graph_id = kernel_graph->graph_id();
-    debugger->DumpSingleNode(cnode, graph_id);
-    // Clear Dumped data when online debugger is not enabled
-    if (!debugger->debugger_enabled()) {
-      debugger->ClearCurrentData();
-    }
-  }
-  // check if the node is last kernel
-  bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
-  debugger->PostExecuteNode(cnode, last_kernel);
-}
-#endif
-
 void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_,
                       const DeviceContext *device_context, OpContext<DeviceTensor> *op_context, const AID *from_aid) {
  MS_EXCEPTION_IF_NULL(node);
@ -160,14 +36,12 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
  MS_EXCEPTION_IF_NULL(op_context);
  MS_EXCEPTION_IF_NULL(from_aid);
  // todo debug.
-  MS_LOG(INFO) << "DebugActor is called";
-#ifdef ENABLE_GPU
+#ifdef ENABLE_DEBUGGER
  if (node->isa<CNode>()) {
    const auto &cnode = node->cast<CNodePtr>();
    auto debugger = Debugger::GetInstance();
    if (debugger) {
      std::string kernel_name = cnode->fullname_with_scope();
-      MS_LOG(INFO) << "kernel_name is  " << kernel_name;
      debugger->SetCurNode(kernel_name);
      bool read_data = CheckReadData(cnode);
      if (read_data) {
@ -185,8 +59,7 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *op_context, const AID *
  MS_EXCEPTION_IF_NULL(op_context);
  MS_EXCEPTION_IF_NULL(from_aid);
  // todo debug.
-  MS_LOG(INFO) << "DebugActor::DebugOnStepEnd is called";
-#ifdef ENABLE_GPU
+#ifdef ENABLE_DEBUGGER
  auto debugger = Debugger::GetInstance();
  if (debugger) {
    debugger->Debugger::UpdateStepNumGPU();
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@ -285,7 +285,9 @@ MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string
    device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_name, device_id});
  device_context->Initialize();
  device_id_ = device_context->device_context_key().device_id_;
-
+#ifdef ENABLE_DEBUGGER
+  SetDebuggerInit();
+#endif
  runtime::GraphScheduler::GetInstance().Initialize();
 }

@ -688,7 +690,7 @@ void MindRTBackend::ConstructOutputs(const AnfNodePtr &output_node,
 }

 #ifdef ENABLE_DEBUGGER
-void MindRTBackend::SetDebugger() {
+void MindRTBackend::SetDebuggerInit() {
  auto debugger_ = Debugger::GetInstance();
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@ -120,7 +120,7 @@ class MindRTBackend : public Backend {
  void RunGraph(const ActorInfo &actor_info, OpRunInfo *op_run_info, const std::vector<int64_t> *tensors_mask,
                const std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs);
 #ifdef ENABLE_DEBUGGER
-  void SetDebugger() override;
+  void SetDebuggerInit();
 #endif

 private: