use launch_info to load input and outputs mindRT
This commit is contained in:
parent
19c3370c46
commit
5c8d48d809
|
@ -252,6 +252,14 @@ const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tens
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Feature group: Offline debugger, Online debugger.
|
||||
* Target device group: Ascend, GPU.
|
||||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Goes through all the watchpoints in the watchpoint table. If the current tensor is in the list of
|
||||
* check_nodes, that watchpoint is added to the vector of watchpoint_to_check (vector of watchpoints that should be
|
||||
* checked for the current tensor) .
|
||||
*/
|
||||
void DebugServices::AddWatchPointsToCheck(bool init_dbg_suspend, bool step_end, bool recheck,
|
||||
const std::shared_ptr<TensorData> &tensor, bool *previous_iter_tensor_needed,
|
||||
std::string *const qualified_tensor_name,
|
||||
|
|
|
@ -453,25 +453,6 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Target device group: Ascend, GPU.
|
||||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Returns true for e2e dump if dump is enabled for the current iteration.
|
||||
*/
|
||||
bool Debugger::DumpDataEnabledIteration() const {
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (!dump_json_parser.e2e_dump_enabled()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cur_iter = dump_json_parser.cur_dump_iter();
|
||||
if (dump_json_parser.IsDumpIter(cur_iter)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Target device group: Ascend, GPU.
|
||||
|
@ -1563,7 +1544,7 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
|
|||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Feature group: Dump, Online debugger.
|
||||
* Target device group: Ascend, GPU.
|
||||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Load all the parameters and value nodes for the last loaded graph.
|
||||
|
@ -1588,7 +1569,7 @@ void Debugger::LoadParametersAndConst() {
|
|||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Feature group: Dump, Online debugger.
|
||||
* Target device group: Ascend, GPU.
|
||||
* Runtime category: Old runtime, MindRT.
|
||||
* Description: Load all the parameters and value nodes for the given graph.
|
||||
|
@ -1658,47 +1639,6 @@ void Debugger::LoadGraphOutputs() {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Target device group: Ascend.
|
||||
* Runtime category: MindRT.
|
||||
* Description: Load a single node for kernel-by-kernel ascend mindRT dump.
|
||||
*/
|
||||
void Debugger::LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id) {
|
||||
if (device_target_ != kAscendDevice) {
|
||||
return;
|
||||
}
|
||||
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
std::string kernel_name = GetKernelNodeName(node);
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(node);
|
||||
if (partial_memory_) {
|
||||
if (!debug_services_->IsWatchPoint(kernel_name, node)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (size_t j = 0; j < output_size; ++j) {
|
||||
if (!AnfAlgo::OutputAddrExist(node, j)) {
|
||||
MS_LOG(INFO) << "Cannot find output addr for slot " << j << " for " << kernel_name;
|
||||
continue;
|
||||
}
|
||||
auto addr = AnfAlgo::GetOutputAddr(node, j);
|
||||
MS_EXCEPTION_IF_NULL(addr);
|
||||
auto type = AnfAlgo::GetOutputInferDataType(node, j);
|
||||
if (!IsTypeDebuggerSupported(type)) {
|
||||
return;
|
||||
}
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(node, j);
|
||||
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Online debugger.
|
||||
* Target device group: GPU.
|
||||
|
@ -1723,7 +1663,8 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
|
|||
* Description: Update step number when DebugActor::DebugOnStepEnd is called at the end of each step.
|
||||
*/
|
||||
void Debugger::UpdateStepNumGPU() {
|
||||
if (device_target_ == kGPUDevice && (debugger_enabled_ || DumpDataEnabledIteration())) {
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (device_target_ == kGPUDevice && (debugger_enabled_ || dump_json_parser.DumpEnabledForIter())) {
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
++num_step_;
|
||||
|
|
|
@ -152,8 +152,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
void LoadGraphOutputs();
|
||||
|
||||
void LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id);
|
||||
|
||||
void CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);
|
||||
|
||||
void LoadGraphs(const KernelGraphPtr &graph_ptr);
|
||||
|
|
|
@ -69,8 +69,8 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
|
|||
* Runtime category: MindRT.
|
||||
* Description: Get kernel inputs from launch_info and load the inputs from device to host.
|
||||
*/
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id) {
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
|
||||
const DeviceContext *device_context) {
|
||||
// get inputs
|
||||
auto kernel_inputs = launch_info->inputs_;
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
|
||||
|
@ -83,17 +83,17 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
|
|||
if (type == kMetaTypeNone) {
|
||||
continue;
|
||||
}
|
||||
#ifdef ENABLE_GPU
|
||||
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
|
||||
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
|
||||
string input_tensor_name = input_kernel_name + ':' + "0";
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
|
||||
auto ret =
|
||||
device_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true, root_graph_id);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
|
|||
* Description: Get kernel outputs from launch_info and load the inputs from device to host.
|
||||
*/
|
||||
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id) {
|
||||
uint32_t root_graph_id, const DeviceContext *device_context) {
|
||||
// get outputs
|
||||
auto kernel_outputs = launch_info->outputs_;
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
|
@ -119,17 +119,16 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
|
|||
if (type == kMetaTypeNone) {
|
||||
continue;
|
||||
}
|
||||
#ifdef ENABLE_GPU
|
||||
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
|
||||
auto device_addr = device_context->CreateDeviceAddress(addr->addr, addr->size, format, type);
|
||||
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||
ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
|
||||
auto ret = device_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false, root_graph_id);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,21 +166,23 @@ bool CheckReadData(const CNodePtr &cnode) {
|
|||
* Description: Load inputs and outputs of the given node if needed and dump them if dump is enabled, then it performs
|
||||
* PostExecuteNode function on the given node.
|
||||
*/
|
||||
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order) {
|
||||
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
const DeviceContext *device_context) {
|
||||
auto debugger = Debugger::GetInstance();
|
||||
if (!debugger) {
|
||||
return;
|
||||
}
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
bool dump_enabled = debugger->DumpDataEnabledIteration();
|
||||
bool dump_enabled = dump_json_parser.DumpEnabledForIter();
|
||||
MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
|
||||
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto root_graph_id = kernel_graph->root_graph_id();
|
||||
if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
|
||||
LoadInputs(cnode, launch_info, exec_order, root_graph_id);
|
||||
LoadInputs(cnode, launch_info, exec_order, root_graph_id, device_context);
|
||||
}
|
||||
if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
|
||||
LoadOutputs(cnode, launch_info, exec_order, root_graph_id);
|
||||
LoadOutputs(cnode, launch_info, exec_order, root_graph_id, device_context);
|
||||
}
|
||||
// Dump kernel
|
||||
if (dump_enabled) {
|
||||
|
@ -198,37 +199,6 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
|
|||
debugger->PostExecuteNode(cnode, last_kernel);
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump.
|
||||
* Target device group: Ascend.
|
||||
* Runtime category: MindRT.
|
||||
* Description: Load outputs of the given node and dump them if dump is enabled for Ascend kernel-by-kernel dump.
|
||||
*/
|
||||
void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order) {
|
||||
auto debugger = Debugger::GetInstance();
|
||||
if (!debugger) {
|
||||
return;
|
||||
}
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
bool dump_enabled = dump_json_parser.DumpEnabledForIter();
|
||||
MS_LOG(DEBUG) << "dump_enabled: " << dump_enabled;
|
||||
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto root_graph_id = kernel_graph->root_graph_id();
|
||||
|
||||
debugger->LoadNodeOutputs(cnode, exec_order, root_graph_id);
|
||||
// Dump kernel
|
||||
if (dump_enabled) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto graph_id = kernel_graph->graph_id();
|
||||
debugger->DumpSingleNode(cnode, graph_id);
|
||||
// Clear Dumped data when online debugger is not enabled
|
||||
if (!debugger->debugger_enabled()) {
|
||||
debugger->ClearCurrentData();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Feature group: Dump, Online Debugger.
|
||||
* Target device group: Ascend, GPU.
|
||||
|
|
|
@ -19,28 +19,29 @@
|
|||
#include <string>
|
||||
#include "debug/debugger/debugger.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "runtime/hardware/device_context.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "toolchain/adx_datadump_callback.h"
|
||||
|
||||
using Adx::DumpChunk;
|
||||
#endif
|
||||
using mindspore::device::DeviceContext;
|
||||
using mindspore::kernel::KernelLaunchInfo;
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);
|
||||
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id);
|
||||
void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order, uint32_t root_graph_id,
|
||||
const DeviceContext *device_context);
|
||||
|
||||
void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
uint32_t root_graph_id);
|
||||
uint32_t root_graph_id, const DeviceContext *device_context);
|
||||
|
||||
bool CheckReadData(const CNodePtr &cnode);
|
||||
|
||||
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order);
|
||||
|
||||
void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order);
|
||||
void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
|
||||
const DeviceContext *device_context);
|
||||
|
||||
std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr);
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
|
|||
debugger->SetCurNode(kernel_name);
|
||||
bool read_data = CheckReadData(cnode);
|
||||
if (read_data) {
|
||||
ReadDataAndDump(cnode, launch_info_, exec_order_);
|
||||
ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
|
||||
}
|
||||
}
|
||||
exec_order_ += 1;
|
||||
|
@ -87,7 +87,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
|
|||
}
|
||||
bool read_data = CheckReadData(cnode);
|
||||
if (read_data) {
|
||||
ReadDataAndDumpAscend(cnode, exec_order_);
|
||||
ReadDataAndDump(cnode, launch_info_, exec_order_, device_context);
|
||||
}
|
||||
}
|
||||
exec_order_ += 1;
|
||||
|
|
Loading…
Reference in New Issue