forked from OSSInnovation/mindspore
!4428 Operation Overflow Watchpoint for D-Chip debugger
Merge pull request !4428 from AdelShafiei/opoverflow2
This commit is contained in:
commit
8f6ed032e5
|
@ -209,4 +209,24 @@ void DataDumpParser::CheckOpDebugMode(uint32_t op_debug_mode) const {
|
|||
MS_LOG(EXCEPTION) << "[DataDump] op_debug_mode in config json file should be [0-3]";
|
||||
}
|
||||
}
|
||||
|
||||
std::string DataDumpParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const {
|
||||
std::string bin_path = "/var/log/npu/ide_daemon/dump";
|
||||
|
||||
const char *dump_data_path = std::getenv("DATA_DUMP_PATH");
|
||||
bin_path.append(dump_data_path);
|
||||
bin_path.append("_");
|
||||
bin_path.append(std::to_string(device_id));
|
||||
bin_path.append("/");
|
||||
bin_path.append(net_name_);
|
||||
bin_path.append("_");
|
||||
bin_path.append(std::to_string(graph_id));
|
||||
bin_path.append("/");
|
||||
bin_path.append(std::to_string(dump_mode_));
|
||||
bin_path.append("/");
|
||||
bin_path.append(std::to_string(dump_step_));
|
||||
bin_path.append("/");
|
||||
|
||||
return bin_path;
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -42,6 +42,7 @@ class DataDumpParser {
|
|||
uint32_t dump_step() const { return dump_step_; }
|
||||
void MatchKernel(const std::string &kernel_name);
|
||||
void PrintUnusedKernel();
|
||||
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;
|
||||
|
||||
private:
|
||||
DataDumpParser() = default;
|
||||
|
|
|
@ -50,6 +50,8 @@ void DebugServices::AddWatchpoint(unsigned int id, unsigned int watch_condition,
|
|||
} else if (watch_condition == 1) {
|
||||
watchpoint_item.conditions.inf.enabled = true;
|
||||
watchpoint_item.conditions.neg_inf.enabled = true;
|
||||
} else if (watch_condition == 2) {
|
||||
watchpoint_item.conditions.overflow.enabled = true;
|
||||
}
|
||||
|
||||
watchpoint_item.check_node_list = check_node_list;
|
||||
|
@ -63,8 +65,8 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
|
|||
}
|
||||
|
||||
void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
|
||||
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
|
||||
std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
|
||||
std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
|
||||
const std::vector<std::string> &op_overflows) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();
|
||||
|
@ -74,6 +76,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
|
||||
for (std::size_t i = 0; i < tensor_list.size(); i++) {
|
||||
current_tensor_name = tensor_list[i]->GetName();
|
||||
std::string tensor_slot = std::to_string(tensor_list[i]->GetSlot());
|
||||
mindspore::tensor::TensorPtr tensor_ptr = tensor_list[i]->GetTensor();
|
||||
int tensor_data_type = tensor_ptr->data_type_c();
|
||||
|
||||
|
@ -106,10 +109,23 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
}
|
||||
}
|
||||
}
|
||||
std::vector<unsigned int> hit_encountered;
|
||||
|
||||
// check if no watchpoints are valid for the current tensor
|
||||
if (watchpoints_to_check_table.empty()) {
|
||||
continue;
|
||||
// handle watchpoint conditions that do not require per element checks
|
||||
for (auto it_w_table_check = watchpoints_to_check_table.begin();
|
||||
it_w_table_check != watchpoints_to_check_table.end(); ++it_w_table_check) {
|
||||
if (it_w_table_check->second.conditions.overflow.enabled) {
|
||||
std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||
if (std::find(op_overflows.begin(), op_overflows.end(), name_no_slot) != op_overflows.end()) {
|
||||
hit_encountered.push_back(it_w_table_check->second.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hit_encountered.size()) {
|
||||
HandleWatchpointHits(hit_encountered, name, slot, condition, watchpoint_id, current_tensor_name,
|
||||
&watchpoints_to_check_table, tensor_slot);
|
||||
hit_encountered.clear();
|
||||
}
|
||||
|
||||
// need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
|
||||
|
@ -117,11 +133,14 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
continue;
|
||||
}
|
||||
|
||||
// check if no watchpoints are remaining
|
||||
if (watchpoints_to_check_table.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c());
|
||||
unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float);
|
||||
|
||||
std::unordered_map<unsigned int, watchpoint_t>::iterator it_w_table_check;
|
||||
std::vector<unsigned int> hit_encountered;
|
||||
|
||||
for (unsigned int index = 0; index < num_elements; index++) {
|
||||
float x = start_addr[index];
|
||||
|
@ -134,33 +153,12 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
} else if (it_w_table_check->second.conditions.nan.enabled && isnan(x)) {
|
||||
hit_encountered.push_back(it_w_table_check->second.id);
|
||||
}
|
||||
|
||||
++it_w_table_check;
|
||||
}
|
||||
|
||||
if (hit_encountered.size()) {
|
||||
for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
|
||||
std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||
name->push_back(name_no_slot);
|
||||
|
||||
slot->push_back(std::to_string(tensor_list[i]->GetSlot()));
|
||||
data_ptr->push_back(reinterpret_cast<char *>(tensor_ptr->data_c()));
|
||||
data_size->push_back(tensor_ptr->data().nbytes());
|
||||
|
||||
int condition_item = -1;
|
||||
if (watchpoint_table[*it_hit_id].conditions.nan.enabled) {
|
||||
condition_item = 0;
|
||||
} else if (watchpoint_table[*it_hit_id].conditions.inf.enabled ||
|
||||
watchpoint_table[*it_hit_id].conditions.neg_inf.enabled) {
|
||||
condition_item = 1;
|
||||
}
|
||||
condition->push_back(condition_item);
|
||||
|
||||
wacthpoint_id->push_back(*it_hit_id);
|
||||
|
||||
watchpoints_to_check_table.erase(*it_hit_id);
|
||||
}
|
||||
|
||||
HandleWatchpointHits(hit_encountered, name, slot, condition, watchpoint_id, current_tensor_name,
|
||||
&watchpoints_to_check_table, tensor_slot);
|
||||
hit_encountered.clear();
|
||||
}
|
||||
|
||||
|
@ -171,6 +169,34 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
}
|
||||
}
|
||||
|
||||
void DebugServices::HandleWatchpointHits(const std::vector<unsigned int> &hit_encountered,
|
||||
std::vector<std::string> *name, std::vector<std::string> *slot,
|
||||
std::vector<int> *condition, std::vector<unsigned int> *watchpoint_id,
|
||||
std::string current_tensor_name,
|
||||
std::unordered_map<unsigned int, watchpoint_t> *watchpoints_to_check_table,
|
||||
std::string tensor_slot) {
|
||||
for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
|
||||
if (watchpoint_table.find(*it_hit_id) != watchpoint_table.end()) {
|
||||
std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||
name->push_back(name_no_slot);
|
||||
slot->push_back(tensor_slot);
|
||||
|
||||
int condition_item = -1;
|
||||
if (watchpoint_table[*it_hit_id].conditions.nan.enabled) {
|
||||
condition_item = 0;
|
||||
} else if (watchpoint_table[*it_hit_id].conditions.inf.enabled ||
|
||||
watchpoint_table[*it_hit_id].conditions.neg_inf.enabled) {
|
||||
condition_item = 1;
|
||||
} else if (watchpoint_table[*it_hit_id].conditions.overflow.enabled) {
|
||||
condition_item = 2;
|
||||
}
|
||||
condition->push_back(condition_item);
|
||||
watchpoint_id->push_back(*it_hit_id);
|
||||
}
|
||||
watchpoints_to_check_table->erase(*it_hit_id);
|
||||
}
|
||||
}
|
||||
|
||||
void DebugServices::CheckSingleWatchpoint(std::shared_ptr<TensorData> watchtensor, std::string *name, std::string *slot,
|
||||
char **data_ptr, unsigned int *data_size, int *condition,
|
||||
unsigned int *wacthpoint_id) {
|
||||
|
|
|
@ -51,6 +51,7 @@ class DebugServices {
|
|||
condition_no_param_t inf;
|
||||
condition_no_param_t neg_inf;
|
||||
condition_no_param_t nan;
|
||||
condition_no_param_t overflow;
|
||||
condition_with_param_t max_below;
|
||||
condition_with_param_t max_above;
|
||||
condition_with_param_t min_below;
|
||||
|
@ -74,9 +75,8 @@ class DebugServices {
|
|||
|
||||
void RemoveWatchpoint(unsigned int id);
|
||||
|
||||
void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
|
||||
std::vector<unsigned int> *data_size, std::vector<int> *condition,
|
||||
std::vector<unsigned int> *wacthpoint_id);
|
||||
void CheckWatchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<int> *condition,
|
||||
std::vector<unsigned int> *watchpoint_id, const std::vector<std::string> &op_overflows);
|
||||
|
||||
void CheckSingleWatchpoint(std::shared_ptr<TensorData> watchnode, std::string *name, std::string *slot,
|
||||
char **data_ptr, unsigned int *data_size, int *condition, unsigned int *wacthpoint_id);
|
||||
|
@ -97,6 +97,12 @@ class DebugServices {
|
|||
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
|
||||
|
||||
TensorLoader *tensor_loader_;
|
||||
|
||||
void HandleWatchpointHits(const std::vector<unsigned int> &hit_encountered, std::vector<std::string> *name,
|
||||
std::vector<std::string> *slot, std::vector<int> *condition,
|
||||
std::vector<unsigned int> *watchpoint_id, std::string current_tensor_name,
|
||||
std::unordered_map<unsigned int, watchpoint_t> *watchpoints_to_check_table,
|
||||
std::string tensor_slot);
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -79,8 +79,16 @@ message WatchCondition {
|
|||
enum Condition {
|
||||
nan = 0;
|
||||
inf = 1;
|
||||
overflow = 2;
|
||||
ge = 3; // greater than and equal to
|
||||
gt = 4; // greater than
|
||||
le = 5; // less than and equal to
|
||||
lt = 6; // less than
|
||||
between = 7; // between
|
||||
}
|
||||
Condition condition = 1;
|
||||
repeated float value = 2; // for between condition, there will be two values
|
||||
repeated bool include = 3; // for between condition, define the value is included or not
|
||||
}
|
||||
|
||||
message WatchNode {
|
||||
|
|
|
@ -14,11 +14,18 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <dirent.h>
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "debug/debugger/debugger.h"
|
||||
#include "debug/data_dump_parser.h"
|
||||
#include "pipeline/jit/pipeline.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
|
@ -49,7 +56,9 @@ Debugger::Debugger()
|
|||
node_name_(""),
|
||||
cur_name_(""),
|
||||
is_dataset_graph_(false),
|
||||
partial_memory_(false) {}
|
||||
partial_memory_(false),
|
||||
last_overflow_bin_(0),
|
||||
overflow_bin_path_("") {}
|
||||
|
||||
void Debugger::Init(const uint32_t device_id, const std::string device_target) {
|
||||
// access lock for public method
|
||||
|
@ -133,6 +142,35 @@ void Debugger::EnableDebugger() {
|
|||
"usage for large models.";
|
||||
}
|
||||
|
||||
if (device_target_ == kAscendDevice) {
|
||||
// set operation overflow info
|
||||
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
|
||||
// new overflow dump files will have a timestamp greater than last_overflow_bin_
|
||||
last_overflow_bin_ = 0;
|
||||
DIR *d;
|
||||
d = opendir(overflow_bin_path_.c_str());
|
||||
if (d) {
|
||||
struct dirent *dir;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_path = overflow_bin_path_;
|
||||
file_path.append(dir->d_name);
|
||||
std::size_t found = file_path.find_last_of(".");
|
||||
if (found == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::string overflow_time = file_path.substr(found + 1);
|
||||
if (stod(overflow_time) <= last_overflow_bin_) {
|
||||
MS_LOG(INFO) << "Old op overflow bin folder" << file_path;
|
||||
continue;
|
||||
}
|
||||
last_overflow_bin_ = stod(overflow_time);
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "last op overflow bin folder" << last_overflow_bin_;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize grpc client
|
||||
if (debugger_enabled_) {
|
||||
grpc_client_ = std::make_unique<GrpcClient>(host, port);
|
||||
|
@ -154,6 +192,9 @@ void Debugger::Reset() {
|
|||
graph_ptr_ = nullptr;
|
||||
grpc_client_ = nullptr;
|
||||
debug_services_ = nullptr;
|
||||
last_overflow_bin_ = 0;
|
||||
overflow_bin_path_ = "";
|
||||
stream_task_to_opname_.clear();
|
||||
}
|
||||
|
||||
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
|
||||
|
@ -200,6 +241,7 @@ void Debugger::PostExecuteNode() {
|
|||
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||
auto watchpoint_table = debug_services_->GetWatchpointTable();
|
||||
auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table);
|
||||
|
||||
// if kernel is watchpoint,and get hit. suspend.
|
||||
if (is_watchpoint) {
|
||||
auto hits = CheckSingleWatchpoint(cur_name_);
|
||||
|
@ -225,6 +267,10 @@ void Debugger::PostDebugOp() {
|
|||
}
|
||||
}
|
||||
|
||||
std::map<std::pair<uint32_t, uint32_t>, std::string> &Debugger::GetStreamTaskToOpnameMap() {
|
||||
return stream_task_to_opname_;
|
||||
}
|
||||
|
||||
void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
|
||||
if (graph_ptr_ != graph_ptr) {
|
||||
MS_LOG(INFO) << "Debugger got new graph: " << graph_ptr->graph_id();
|
||||
|
@ -476,15 +522,15 @@ void Debugger::Exit() {
|
|||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
std::list<WatchpointHit> Debugger::CheckWatchpoints() const {
|
||||
std::list<WatchpointHit> Debugger::CheckWatchpoints() {
|
||||
std::vector<std::string> name;
|
||||
std::vector<std::string> slot;
|
||||
std::vector<char *> data_ptr;
|
||||
std::vector<unsigned int> data_size;
|
||||
std::vector<int> condition;
|
||||
std::vector<unsigned int> watchpoint_id;
|
||||
std::vector<std::string> overflow_ops;
|
||||
|
||||
debug_services_->CheckWatchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
|
||||
overflow_ops = CheckOpOverflow();
|
||||
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, overflow_ops);
|
||||
std::list<WatchpointHit> hits;
|
||||
for (unsigned int i = 0; i < name.size(); i++) {
|
||||
WatchpointHit hit;
|
||||
|
@ -658,4 +704,70 @@ void Debugger::SetStepNum(int32_t cur_num_step) {
|
|||
|
||||
int32_t Debugger::step_num() const { return num_step_; }
|
||||
|
||||
uint64_t BytestoInt64(const std::vector<char> &buffer) {
|
||||
uint64_t ret;
|
||||
|
||||
ret = ((uint64_t)buffer[7] << 56) | ((uint64_t)buffer[6] << 48) | ((uint64_t)buffer[5] << 40) |
|
||||
((uint64_t)buffer[4] << 32) | (buffer[3] << 24) | (buffer[2] << 16) | (buffer[1] << 8) | buffer[0];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define BUF_SIZ 256
|
||||
std::vector<std::string> Debugger::CheckOpOverflow() {
|
||||
std::vector<double> bin_list;
|
||||
std::vector<std::string> op_names;
|
||||
DIR *d;
|
||||
struct dirent *dir;
|
||||
d = opendir(overflow_bin_path_.c_str());
|
||||
if (d) {
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_path = overflow_bin_path_;
|
||||
file_path.append(dir->d_name);
|
||||
std::string file_name = dir->d_name;
|
||||
std::size_t found = file_name.find_last_of(".");
|
||||
if (found == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::string overflow_time = file_name.substr(found + 1);
|
||||
if (stod(overflow_time) <= last_overflow_bin_) {
|
||||
MS_LOG(INFO) << "File already processed " << file_name;
|
||||
continue;
|
||||
}
|
||||
bin_list.push_back(stod(overflow_time));
|
||||
std::fstream infile;
|
||||
infile.open(file_path.c_str(), std::ios::binary | std::ios::in);
|
||||
infile.seekg(313, std::ios::beg);
|
||||
std::vector<char> buffer;
|
||||
buffer.resize(BUF_SIZ);
|
||||
infile.read(buffer.data(), BUF_SIZ);
|
||||
uint64_t stream_id = BytestoInt64(std::vector<char>(buffer.begin() + 8, buffer.end()));
|
||||
uint64_t task_id = BytestoInt64(std::vector<char>(buffer.begin() + 16, buffer.end()));
|
||||
MS_LOG(INFO) << "Overflow stream_id " << stream_id << ", task_id " << task_id << ".";
|
||||
auto op = debugger_->stream_task_to_opname_.find(std::make_pair(stream_id, task_id));
|
||||
if (op != debugger_->stream_task_to_opname_.end()) {
|
||||
MS_LOG(ERROR) << "Overflow detected on node " << op->second << std::endl;
|
||||
op_names.push_back(op->second);
|
||||
} else {
|
||||
MS_LOG(INFO) << "No overflow is detected " << std::endl;
|
||||
}
|
||||
infile.close();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(INFO) << "OverFlow bin directory does not exist!";
|
||||
}
|
||||
closedir(d);
|
||||
MS_LOG(ERROR) << "These operation overflows are detected " << op_names;
|
||||
|
||||
for (auto &i : bin_list) {
|
||||
if (i > last_overflow_bin_) {
|
||||
last_overflow_bin_ = i;
|
||||
}
|
||||
}
|
||||
|
||||
return op_names;
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "debug/debugger/grpc_client.h"
|
||||
#include "debug/debug_services.h"
|
||||
|
@ -90,6 +93,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
int32_t step_num() const;
|
||||
|
||||
std::map<std::pair<uint32_t, uint32_t>, std::string> &GetStreamTaskToOpnameMap();
|
||||
|
||||
private:
|
||||
// private constructor for singleton
|
||||
Debugger();
|
||||
|
@ -130,12 +135,15 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
// analyze tensors and check watchpoint conditions
|
||||
// return names of tensors and what condition they hit
|
||||
std::list<WatchpointHit> CheckWatchpoints() const;
|
||||
std::list<WatchpointHit> CheckWatchpoints();
|
||||
std::list<WatchpointHit> CheckSingleWatchpoint(std::string watchnode) const;
|
||||
|
||||
// send watchpoints that hit and enter command wait loop
|
||||
void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points);
|
||||
|
||||
// Find if any operation overflow happened and return their names
|
||||
std::vector<std::string> CheckOpOverflow();
|
||||
|
||||
// class members
|
||||
std::unique_ptr<GrpcClient> grpc_client_;
|
||||
std::unique_ptr<DebugServices> debug_services_;
|
||||
|
@ -150,7 +158,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
bool is_dataset_graph_;
|
||||
bool partial_memory_;
|
||||
std::mutex access_lock_;
|
||||
|
||||
std::map<std::pair<uint32_t, uint32_t>, std::string> stream_task_to_opname_;
|
||||
double last_overflow_bin_;
|
||||
std::string overflow_bin_path_;
|
||||
// singleton
|
||||
static std::mutex instance_lock_;
|
||||
static std::shared_ptr<Debugger> debugger_;
|
||||
|
@ -180,5 +190,6 @@ ProtoVector<TensorProto> GetTensors(const EventReply &reply);
|
|||
// get the full name of a tensor, which is the name used in TensorLoader
|
||||
std::string GetTensorFullName(const TensorProto &tensor);
|
||||
|
||||
uint64_t BytestoInt64(const std::vector<char> &buffer);
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
#include "proto/op_mapping_info.pb.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "debug/data_dump_parser.h"
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debugger/debugger.h"
|
||||
#endif
|
||||
|
||||
static constexpr uint32_t kAicpuLoadFlag = 1;
|
||||
static constexpr uint32_t kAicpuUnloadFlag = 0;
|
||||
|
@ -90,6 +93,18 @@ void DataDumper::LoadDumpInfo() {
|
|||
load_flag_ = true;
|
||||
// graph id may changed in Unload
|
||||
graph_id_ = kernel_graph_->graph_id();
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
auto debugger = mindspore::Debugger::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(debugger);
|
||||
std::map<std::pair<uint32_t, uint32_t>, std::string> &stream_task_to_opname = debugger->GetStreamTaskToOpnameMap();
|
||||
// extract stream id, task id and opname from runtime_info_map for overflow detection
|
||||
std::transform(runtime_info_map_.begin(), runtime_info_map_.end(),
|
||||
std::inserter(stream_task_to_opname, stream_task_to_opname.end()),
|
||||
[](const std::pair<std::string, std::shared_ptr<RuntimeInfo>> &p)
|
||||
-> std::pair<std::pair<uint32_t, uint32_t>, std::string> {
|
||||
return {{std::get<1>(*p.second), std::get<0>(*p.second)}, p.first};
|
||||
});
|
||||
#endif
|
||||
MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue