use launch_info to load input and outputs mindRT

2022-01-24 13:25:43 -05:00 · 2022-01-24 13:25:43 -05:00 · 5c8d48d809
parent 19c3370c46
commit 5c8d48d809
6 changed files with 37 additions and 119 deletions
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@ -252,6 +252,14 @@ const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tens
 }
 #endif

+/*
+ * Feature group: Offline debugger, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Goes through all the watchpoints in the watchpoint table. If the current tensor is in the list of
+ * check_nodes, that watchpoint is added to the vector of watchpoint_to_check (vector of watchpoints that should be
+ * checked for the current tensor) .
+ */
 void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck,
                                          const std::shared_ptr<TensorData> &tensor, bool *previous_iter_tensor_needed,
                                          std::string *const qualified_tensor_name,
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -453,25 +453,6 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
  }
 }

-/*
- * Feature group: Dump.
- * Target device group: Ascend, GPU.
- * Runtime category: Old runtime, MindRT.
- * Description: Returns true for e2e dump if dump is enabled for the current iteration.
- */
-bool Debugger::DumpDataEnabledIteration() const {
-  auto &dump_json_parser = DumpJsonParser::GetInstance();
-  if (!dump_json_parser.e2e_dump_enabled()) {
-    return false;
-  }
-
-  auto cur_iter = dump_json_parser.cur_dump_iter();
-  if (dump_json_parser.IsDumpIter(cur_iter)) {
-    return true;
-  }
-  return false;
-}
-
 /*
 * Feature group: Dump.
 * Target device group: Ascend, GPU.
@ -1563,7 +1544,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
 }

 /*
- * Feature group: Dump.
+ * Feature group: Dump, Online debugger.
 * Target device group: Ascend, GPU.
 * Runtime category: Old runtime, MindRT.
 * Description: Load all the parameters and value nodes for the last loaded graph.
@ -1588,7 +1569,7 @@ void Debugger::LoadParametersAndConst() {
 }

 /*
- * Feature group: Dump.
+ * Feature group: Dump, Online debugger.
 * Target device group: Ascend, GPU.
 * Runtime category: Old runtime, MindRT.
 * Description: Load all the parameters and value nodes for the given graph.
@ -1658,47 +1639,6 @@ void Debugger::LoadGraphOutputs() {
  }
 }

-/*
- * Feature group: Dump.
- * Target device group: Ascend.
- * Runtime category: MindRT.
- * Description: Load a single node for kernel-by-kernel ascend mindRT dump.
- */
-void Debugger::LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id) {
-  if (device_target_ != kAscendDevice) {
-    return;
-  }
-
-  MS_EXCEPTION_IF_NULL(node);
-  std::string kernel_name = GetKernelNodeName(node);
-  auto output_size = AnfAlgo::GetOutputTensorNum(node);
-  if (partial_memory_) {
-    if (!debug_services_->IsWatchPoint(kernel_name, node)) {
-      return;
-    }
-  }
-  for (size_t j = 0; j < output_size; ++j) {
-    if (!AnfAlgo::OutputAddrExist(node, j)) {
-      MS_LOG(INFO) << "Cannot find output addr for slot " << j << " for " << kernel_name;
-      continue;
-    }
-    auto addr = AnfAlgo::GetOutputAddr(node, j);
-    MS_EXCEPTION_IF_NULL(addr);
-    auto type = AnfAlgo::GetOutputInferDataType(node, j);
-    if (!IsTypeDebuggerSupported(type)) {
-      return;
-    }
-    auto format = kOpFormat_DEFAULT;
-    string tensor_name = kernel_name + ':' + std::to_string(j);
-    ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j);
-    auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
-    if (!ret) {
-      MS_LOG(ERROR) << "LoadMemToHost:"
-                    << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
-    }
-  }
-}
-
 /*
 * Feature group: Online debugger.
 * Target device group: GPU.
@ -1723,7 +1663,8 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
 * Description: Update step number when DebugActor::DebugOnStepEnd is called at the end of each step.
 */
 void Debugger::UpdateStepNumGPU() {
-  if (device_target_ == kGPUDevice && (debugger_enabled_ || DumpDataEnabledIteration())) {
+  auto &dump_json_parser = DumpJsonParser::GetInstance();
+  if (device_target_ == kGPUDevice && (debugger_enabled_ || dump_json_parser.DumpEnabledForIter())) {
    // access lock for public method
    std::lock_guard<std::mutex> a_lock(access_lock_);
    ++num_step_;
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@ -152,8 +152,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

  void LoadGraphOutputs();

-  void LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id);
-
  void CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);

  void LoadGraphs(const KernelGraphPtr &graph_ptr);
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.cc
@ -69,8 +69,8 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
 * Runtime category: MindRT.
 * Description: Get kernel inputs from launch_info and load the inputs from device to host.
 */
-void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
-                uint32_t root_graph_id) {
+void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
+                const DeviceContext *device_context) {
  // get inputs
  auto kernel_inputs = launch_info->inputs_;
  auto input_size = AnfAlgo::GetInputTensorNum(cnode);
@ -83,17 +83,17 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
    if (type == kMetaTypeNone) {
      continue;
    }
-#ifdef ENABLE_GPU
+
    auto format = kOpFormat_DEFAULT;
-    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
+    auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
    string input_tensor_name = input_kernel_name + ':' + "0";
    ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
-    auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
+    auto ret =
+      device_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
    if (!ret) {
      MS_LOG(ERROR) << "LoadMemToHost:"
                    << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
    }
-#endif
  }
 }

@ -104,7 +104,7 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
 * Description: Get kernel outputs from launch_info and load the inputs from device to host.
 */
 void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
-                 uint32_t root_graph_id) {
+                 uint32_t root_graph_id, const DeviceContext *device_context) {
  // get outputs
  auto kernel_outputs = launch_info->outputs_;
  auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
@ -119,17 +119,16 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
    if (type == kMetaTypeNone) {
      continue;
    }
-#ifdef ENABLE_GPU
+
    auto format = kOpFormat_DEFAULT;
-    auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
+    auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
    string tensor_name = kernel_name + ':' + std::to_string(j);
    ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
-    auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
+    auto ret = device_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
    if (!ret) {
      MS_LOG(ERROR) << "LoadMemToHost:"
                    << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
    }
-#endif
  }
 }

@ -167,21 +166,23 @@ bool CheckReadData(const CNodePtr &cnode) {
 * Description: Load inputs and outputs of the given node if needed and dump them if dump is enabled, then it performs
 * PostExecuteNode function on the given node.
 */
-void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order) {
+void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
+                     const DeviceContext *device_context) {
  auto debugger = Debugger::GetInstance();
  if (!debugger) {
    return;
  }
  auto &dump_json_parser = DumpJsonParser::GetInstance();
-  bool dump_enabled = debugger->DumpDataEnabledIteration();
+  bool dump_enabled = dump_json_parser.DumpEnabledForIter();
+  MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
  auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto root_graph_id = kernel_graph->root_graph_id();
  if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
-    LoadInputs(cnode, launch_info, exec_order, root_graph_id);
+    LoadInputs(cnode, launch_info, exec_order, root_graph_id, device_context);
  }
  if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
-    LoadOutputs(cnode, launch_info, exec_order, root_graph_id);
+    LoadOutputs(cnode, launch_info, exec_order, root_graph_id, device_context);
  }
  // Dump kernel
  if (dump_enabled) {
@ -198,37 +199,6 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
  debugger->PostExecuteNode(cnode, last_kernel);
 }

-/*
- * Feature group: Dump.
- * Target device group: Ascend.
- * Runtime category: MindRT.
- * Description: Load outputs of the given node and dump them if dump is enabled for Ascend kernel-by-kernel dump.
- */
-void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order) {
-  auto debugger = Debugger::GetInstance();
-  if (!debugger) {
-    return;
-  }
-  auto &dump_json_parser = DumpJsonParser::GetInstance();
-  bool dump_enabled = dump_json_parser.DumpEnabledForIter();
-  MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
-  auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
-  MS_EXCEPTION_IF_NULL(kernel_graph);
-  auto root_graph_id = kernel_graph->root_graph_id();
-
-  debugger->LoadNodeOutputs(cnode, exec_order, root_graph_id);
-  // Dump kernel
-  if (dump_enabled) {
-    MS_EXCEPTION_IF_NULL(kernel_graph);
-    auto graph_id = kernel_graph->graph_id();
-    debugger->DumpSingleNode(cnode, graph_id);
-    // Clear Dumped data when online debugger is not enabled
-    if (!debugger->debugger_enabled()) {
-      debugger->ClearCurrentData();
-    }
-  }
-}
-
 /*
 * Feature group: Dump, Online Debugger.
 * Target device group: Ascend, GPU.
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.h
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.h
@ -19,28 +19,29 @@
 #include <string>
 #include "debug/debugger/debugger.h"
 #include "backend/kernel_compiler/kernel.h"
+#include "runtime/hardware/device_context.h"
 #ifdef ENABLE_D
 #include "toolchain/adx_datadump_callback.h"

 using Adx::DumpChunk;
 #endif
+using mindspore::device::DeviceContext;
 using mindspore::kernel::KernelLaunchInfo;

 namespace mindspore {

 std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);

-void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
-                uint32_t root_graph_id);
+void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
+                const DeviceContext *device_context);

 void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
-                 uint32_t root_graph_id);
+                 uint32_t root_graph_id, const DeviceContext *device_context);

 bool CheckReadData(const CNodePtr &cnode);

-void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order);
-
-void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order);
+void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
+                     const DeviceContext *device_context);

 std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);

--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@ -71,7 +71,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
      debugger->SetCurNode(kernel_name);
      bool read_data = CheckReadData(cnode);
      if (read_data) {
-        ReadDataAndDump(cnode, launch_info_, exec_order_);
+        ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
      }
    }
    exec_order_ += 1;
@ -87,7 +87,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
      }
      bool read_data = CheckReadData(cnode);
      if (read_data) {
-        ReadDataAndDumpAscend(cnode, exec_order_);
+        ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
      }
    }
    exec_order_ += 1;