use launch_info to load input and outputs mindRT

This commit is contained in:
Parastoo Ashtari 2022-01-24 13:25:43 -05:00
parent 19c3370c46
commit 5c8d48d809
6 changed files with 37 additions and 119 deletions

View File

@ -252,6 +252,14 @@ const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tens
}
#endif
/*
* Feature group: Offline debugger, Online debugger.
* Target device group: Ascend, GPU.
* Runtime category: Old runtime, MindRT.
* Description: Goes through all the watchpoints in the watchpoint table. If the current tensor is in the list of
* check_nodes, that watchpoint is added to the vector of watchpoint_to_check (vector of watchpoints that should be
* checked for the current tensor) .
*/
void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck,
const std::shared_ptr<TensorData> &tensor, bool *previous_iter_tensor_needed,
std::string *const qualified_tensor_name,

View File

@ -453,25 +453,6 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
}
}
/*
* Feature group: Dump.
* Target device group: Ascend, GPU.
* Runtime category: Old runtime, MindRT.
* Description: Returns true for e2e dump if dump is enabled for the current iteration.
*/
bool Debugger::DumpDataEnabledIteration() const {
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (!dump_json_parser.e2e_dump_enabled()) {
return false;
}
auto cur_iter = dump_json_parser.cur_dump_iter();
if (dump_json_parser.IsDumpIter(cur_iter)) {
return true;
}
return false;
}
/*
* Feature group: Dump.
* Target device group: Ascend, GPU.
@ -1563,7 +1544,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
}
/*
* Feature group: Dump.
* Feature group: Dump, Online debugger.
* Target device group: Ascend, GPU.
* Runtime category: Old runtime, MindRT.
* Description: Load all the parameters and value nodes for the last loaded graph.
@ -1588,7 +1569,7 @@ void Debugger::LoadParametersAndConst() {
}
/*
* Feature group: Dump.
* Feature group: Dump, Online debugger.
* Target device group: Ascend, GPU.
* Runtime category: Old runtime, MindRT.
* Description: Load all the parameters and value nodes for the given graph.
@ -1658,47 +1639,6 @@ void Debugger::LoadGraphOutputs() {
}
}
/*
* Feature group: Dump.
* Target device group: Ascend.
* Runtime category: MindRT.
* Description: Load a single node for kernel-by-kernel ascend mindRT dump.
*/
void Debugger::LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id) {
if (device_target_ != kAscendDevice) {
return;
}
MS_EXCEPTION_IF_NULL(node);
std::string kernel_name = GetKernelNodeName(node);
auto output_size = AnfAlgo::GetOutputTensorNum(node);
if (partial_memory_) {
if (!debug_services_->IsWatchPoint(kernel_name, node)) {
return;
}
}
for (size_t j = 0; j < output_size; ++j) {
if (!AnfAlgo::OutputAddrExist(node, j)) {
MS_LOG(INFO) << "Cannot find output addr for slot " << j << " for " << kernel_name;
continue;
}
auto addr = AnfAlgo::GetOutputAddr(node, j);
MS_EXCEPTION_IF_NULL(addr);
auto type = AnfAlgo::GetOutputInferDataType(node, j);
if (!IsTypeDebuggerSupported(type)) {
return;
}
auto format = kOpFormat_DEFAULT;
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j);
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
}
}
}
/*
* Feature group: Online debugger.
* Target device group: GPU.
@ -1723,7 +1663,8 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
* Description: Update step number when DebugActor::DebugOnStepEnd is called at the end of each step.
*/
void Debugger::UpdateStepNumGPU() {
if (device_target_ == kGPUDevice && (debugger_enabled_ || DumpDataEnabledIteration())) {
auto &dump_json_parser = DumpJsonParser::GetInstance();
if (device_target_ == kGPUDevice && (debugger_enabled_ || dump_json_parser.DumpEnabledForIter())) {
// access lock for public method
std::lock_guard<std::mutex> a_lock(access_lock_);
++num_step_;

View File

@ -152,8 +152,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
void LoadGraphOutputs();
void LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id);
void CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);
void LoadGraphs(const KernelGraphPtr &graph_ptr);

View File

@ -69,8 +69,8 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
* Runtime category: MindRT.
* Description: Get kernel inputs from launch_info and load the inputs from device to host.
*/
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
uint32_t root_graph_id) {
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
const DeviceContext *device_context) {
// get inputs
auto kernel_inputs = launch_info->inputs_;
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
@ -83,17 +83,17 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
if (type == kMetaTypeNone) {
continue;
}
#ifdef ENABLE_GPU
auto format = kOpFormat_DEFAULT;
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
string input_tensor_name = input_kernel_name + ':' + "0";
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
auto ret =
device_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
}
#endif
}
}
@ -104,7 +104,7 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
* Description: Get kernel outputs from launch_info and load the inputs from device to host.
*/
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
uint32_t root_graph_id) {
uint32_t root_graph_id, const DeviceContext *device_context) {
// get outputs
auto kernel_outputs = launch_info->outputs_;
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
@ -119,17 +119,16 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
if (type == kMetaTypeNone) {
continue;
}
#ifdef ENABLE_GPU
auto format = kOpFormat_DEFAULT;
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
auto ret = device_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
}
#endif
}
}
@ -167,21 +166,23 @@ bool CheckReadData(const CNodePtr &cnode) {
* Description: Load inputs and outputs of the given node if needed and dump them if dump is enabled, then it performs
* PostExecuteNode function on the given node.
*/
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order) {
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
const DeviceContext *device_context) {
auto debugger = Debugger::GetInstance();
if (!debugger) {
return;
}
auto &dump_json_parser = DumpJsonParser::GetInstance();
bool dump_enabled = debugger->DumpDataEnabledIteration();
bool dump_enabled = dump_json_parser.DumpEnabledForIter();
MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
auto root_graph_id = kernel_graph->root_graph_id();
if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
LoadInputs(cnode, launch_info, exec_order, root_graph_id);
LoadInputs(cnode, launch_info, exec_order, root_graph_id, device_context);
}
if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
LoadOutputs(cnode, launch_info, exec_order, root_graph_id);
LoadOutputs(cnode, launch_info, exec_order, root_graph_id, device_context);
}
// Dump kernel
if (dump_enabled) {
@ -198,37 +199,6 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
debugger->PostExecuteNode(cnode, last_kernel);
}
/*
* Feature group: Dump.
* Target device group: Ascend.
* Runtime category: MindRT.
* Description: Load outputs of the given node and dump them if dump is enabled for Ascend kernel-by-kernel dump.
*/
void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order) {
auto debugger = Debugger::GetInstance();
if (!debugger) {
return;
}
auto &dump_json_parser = DumpJsonParser::GetInstance();
bool dump_enabled = dump_json_parser.DumpEnabledForIter();
MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
auto root_graph_id = kernel_graph->root_graph_id();
debugger->LoadNodeOutputs(cnode, exec_order, root_graph_id);
// Dump kernel
if (dump_enabled) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto graph_id = kernel_graph->graph_id();
debugger->DumpSingleNode(cnode, graph_id);
// Clear Dumped data when online debugger is not enabled
if (!debugger->debugger_enabled()) {
debugger->ClearCurrentData();
}
}
}
/*
* Feature group: Dump, Online Debugger.
* Target device group: Ascend, GPU.

View File

@ -19,28 +19,29 @@
#include <string>
#include "debug/debugger/debugger.h"
#include "backend/kernel_compiler/kernel.h"
#include "runtime/hardware/device_context.h"
#ifdef ENABLE_D
#include "toolchain/adx_datadump_callback.h"
using Adx::DumpChunk;
#endif
using mindspore::device::DeviceContext;
using mindspore::kernel::KernelLaunchInfo;
namespace mindspore {
std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
uint32_t root_graph_id);
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
const DeviceContext *device_context);
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
uint32_t root_graph_id);
uint32_t root_graph_id, const DeviceContext *device_context);
bool CheckReadData(const CNodePtr &cnode);
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order);
void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order);
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
const DeviceContext *device_context);
std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);

View File

@ -71,7 +71,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
debugger->SetCurNode(kernel_name);
bool read_data = CheckReadData(cnode);
if (read_data) {
ReadDataAndDump(cnode, launch_info_, exec_order_);
ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
}
}
exec_order_ += 1;
@ -87,7 +87,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
}
bool read_data = CheckReadData(cnode);
if (read_data) {
ReadDataAndDumpAscend(cnode, exec_order_);
ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
}
}
exec_order_ += 1;