!23047 23046 fix offline debugger read tensor and online debugger overflow and check watchpoint bug r1.4

Merge pull request !23047 from parastooashtari/overflow_bug_1.4
This commit is contained in:
i-robot 2021-09-16 14:04:25 +00:00 committed by Gitee
commit 683439f0dd
2 changed files with 10 additions and 6 deletions

View File

@ -759,7 +759,7 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
std::string slot_string_to_check; std::string slot_string_to_check;
std::string prefix_dump_file_name; std::string prefix_dump_file_name;
SetPrefixToCheck(&prefix_dump_file_name, &slot_string_to_check, &dump_style_kernel_name, slot[i], is_output[i]); SetPrefixToCheck(&prefix_dump_file_name, &slot_string_to_check, &dump_style_kernel_name, slot[i], is_output[i]);
std::string prefix_dump_to_check = dump_style_kernel_name; std::string prefix_dump_to_check = dump_style_kernel_name + '.';
GetNodeNameWithoutScope(&prefix_dump_to_check); GetNodeNameWithoutScope(&prefix_dump_to_check);
std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" + std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" +
@ -1092,12 +1092,12 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
#ifdef ONLINE_DBG_MODE #ifdef ONLINE_DBG_MODE
auto debugger = Debugger::GetInstance(); auto debugger = Debugger::GetInstance();
overflow_bin_path = DumpJsonParser::GetInstance().GetOpOverflowBinPath(debugger->GetGraphPtr()->graph_id()); overflow_bin_path = DumpJsonParser::GetInstance().GetOpOverflowBinPath(debugger->GetGraphPtr()->graph_id());
auto realpath = Common::GetRealPath(overflow_bin_path); std::string check_overflow_bin_path = RealPath(overflow_bin_path);
if (!realpath.has_value()) { if (check_overflow_bin_path.empty()) {
MS_LOG(ERROR) << "Get real path failed for overflow_bin_path."; MS_LOG(WARNING) << "Get real path failed for overflow_bin_path.";
return false; return false;
} }
overflow_bin_path = realpath.value(); overflow_bin_path = check_overflow_bin_path;
#else #else
overflow_bin_path = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" + overflow_bin_path = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" +
std::to_string(root_graph_id) + "/" + IterationString(iteration) + "/"; std::to_string(root_graph_id) + "/" + IterationString(iteration) + "/";
@ -1279,7 +1279,7 @@ std::string DebugServices::RealPath(const std::string &input_path) {
MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX; MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
} }
if (realpath(prefix_path.c_str(), real_path) == nullptr) { if (realpath(prefix_path.c_str(), real_path) == nullptr) {
MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist."; MS_LOG(WARNING) << "The dir " << prefix_path << " does not exist.";
return ""; return "";
} }

View File

@ -136,6 +136,10 @@ class DebugServices {
bool check_tensor_name = found != std::string::npos && w_name.substr(found + 1) == tensor_name; bool check_tensor_name = found != std::string::npos && w_name.substr(found + 1) == tensor_name;
bool check_node_name = (w_type && (node_name == w_name || w_name == "*")) || (!w_type && node_name == w_name); bool check_node_name = (w_type && (node_name == w_name || w_name == "*")) || (!w_type && node_name == w_name);
if (check_tensor_name || check_node_name) { if (check_tensor_name || check_node_name) {
// online debugger only support single card
if (check_node_device_list.empty()) {
return w_name;
}
auto device_vec = std::get<1>(check_node_device_list[indx]); auto device_vec = std::get<1>(check_node_device_list[indx]);
auto root_graph_vec = std::get<1>(check_node_graph_list[indx]); auto root_graph_vec = std::get<1>(check_node_graph_list[indx]);
auto iter1 = std::find(device_vec.begin(), device_vec.end(), tensor_device_id); auto iter1 = std::find(device_vec.begin(), device_vec.end(), tensor_device_id);