forked from mindspore-Ecosystem/mindspore
!23047 23046 fix offline debugger read tensor and online debugger overflow and check watchpoint bug r1.4
Merge pull request !23047 from parastooashtari/overflow_bug_1.4
This commit is contained in:
commit
683439f0dd
|
@ -759,7 +759,7 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
|
||||||
std::string slot_string_to_check;
|
std::string slot_string_to_check;
|
||||||
std::string prefix_dump_file_name;
|
std::string prefix_dump_file_name;
|
||||||
SetPrefixToCheck(&prefix_dump_file_name, &slot_string_to_check, &dump_style_kernel_name, slot[i], is_output[i]);
|
SetPrefixToCheck(&prefix_dump_file_name, &slot_string_to_check, &dump_style_kernel_name, slot[i], is_output[i]);
|
||||||
std::string prefix_dump_to_check = dump_style_kernel_name;
|
std::string prefix_dump_to_check = dump_style_kernel_name + '.';
|
||||||
GetNodeNameWithoutScope(&prefix_dump_to_check);
|
GetNodeNameWithoutScope(&prefix_dump_to_check);
|
||||||
|
|
||||||
std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" +
|
std::string specific_dump_dir = dump_dir + "/rank_" + std::to_string(device_id[i]) + "/" + net_name + "/" +
|
||||||
|
@ -1092,12 +1092,12 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
|
||||||
#ifdef ONLINE_DBG_MODE
|
#ifdef ONLINE_DBG_MODE
|
||||||
auto debugger = Debugger::GetInstance();
|
auto debugger = Debugger::GetInstance();
|
||||||
overflow_bin_path = DumpJsonParser::GetInstance().GetOpOverflowBinPath(debugger->GetGraphPtr()->graph_id());
|
overflow_bin_path = DumpJsonParser::GetInstance().GetOpOverflowBinPath(debugger->GetGraphPtr()->graph_id());
|
||||||
auto realpath = Common::GetRealPath(overflow_bin_path);
|
std::string check_overflow_bin_path = RealPath(overflow_bin_path);
|
||||||
if (!realpath.has_value()) {
|
if (check_overflow_bin_path.empty()) {
|
||||||
MS_LOG(ERROR) << "Get real path failed for overflow_bin_path.";
|
MS_LOG(WARNING) << "Get real path failed for overflow_bin_path.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
overflow_bin_path = realpath.value();
|
overflow_bin_path = check_overflow_bin_path;
|
||||||
#else
|
#else
|
||||||
overflow_bin_path = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" +
|
overflow_bin_path = dump_dir + "/rank_" + std::to_string(device_id) + "/" + net_name + "/" +
|
||||||
std::to_string(root_graph_id) + "/" + IterationString(iteration) + "/";
|
std::to_string(root_graph_id) + "/" + IterationString(iteration) + "/";
|
||||||
|
@ -1279,7 +1279,7 @@ std::string DebugServices::RealPath(const std::string &input_path) {
|
||||||
MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
|
MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
|
||||||
}
|
}
|
||||||
if (realpath(prefix_path.c_str(), real_path) == nullptr) {
|
if (realpath(prefix_path.c_str(), real_path) == nullptr) {
|
||||||
MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist.";
|
MS_LOG(WARNING) << "The dir " << prefix_path << " does not exist.";
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -136,6 +136,10 @@ class DebugServices {
|
||||||
bool check_tensor_name = found != std::string::npos && w_name.substr(found + 1) == tensor_name;
|
bool check_tensor_name = found != std::string::npos && w_name.substr(found + 1) == tensor_name;
|
||||||
bool check_node_name = (w_type && (node_name == w_name || w_name == "*")) || (!w_type && node_name == w_name);
|
bool check_node_name = (w_type && (node_name == w_name || w_name == "*")) || (!w_type && node_name == w_name);
|
||||||
if (check_tensor_name || check_node_name) {
|
if (check_tensor_name || check_node_name) {
|
||||||
|
// online debugger only support single card
|
||||||
|
if (check_node_device_list.empty()) {
|
||||||
|
return w_name;
|
||||||
|
}
|
||||||
auto device_vec = std::get<1>(check_node_device_list[indx]);
|
auto device_vec = std::get<1>(check_node_device_list[indx]);
|
||||||
auto root_graph_vec = std::get<1>(check_node_graph_list[indx]);
|
auto root_graph_vec = std::get<1>(check_node_graph_list[indx]);
|
||||||
auto iter1 = std::find(device_vec.begin(), device_vec.end(), tensor_device_id);
|
auto iter1 = std::find(device_vec.begin(), device_vec.end(), tensor_device_id);
|
||||||
|
|
Loading…
Reference in New Issue