bugfix: duplicate wp hit results in offline dbg
This commit is contained in:
parent
dab15fb156
commit
23eb886d11
|
@ -891,7 +891,7 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f
|
||||||
if (last_slash_pos != std::string::npos) {
|
if (last_slash_pos != std::string::npos) {
|
||||||
file_n = file_to_find.substr(last_slash_pos + 1);
|
file_n = file_to_find.substr(last_slash_pos + 1);
|
||||||
}
|
}
|
||||||
if (candidate.find(file_n) != std::string::npos && candidate.rfind(kNpyExt) != std::string::npos) {
|
if (candidate.find(file_n + ".") != std::string::npos && candidate.rfind(kNpyExt) != std::string::npos) {
|
||||||
// we found a converted file for this op
|
// we found a converted file for this op
|
||||||
std::string found_file = dump_key + "/" + candidate;
|
std::string found_file = dump_key + "/" + candidate;
|
||||||
result_list->insert(found_file);
|
result_list->insert(found_file);
|
||||||
|
@ -1044,8 +1044,8 @@ void DebugServices::GetTensorDataInfoAsync(const std::vector<std::tuple<std::str
|
||||||
if (delim != std::string::npos) {
|
if (delim != std::string::npos) {
|
||||||
file_name_to_check = file_name.substr(delim + 1);
|
file_name_to_check = file_name.substr(delim + 1);
|
||||||
}
|
}
|
||||||
std::size_t found = file_name_to_check.find(dump_name);
|
std::size_t found = file_name_to_check.find("." + dump_name + ".");
|
||||||
std::size_t found_out = file_name_to_check.find(output_str);
|
std::size_t found_out = file_name_to_check.find(output_str, found + dump_name.length());
|
||||||
std::size_t found_dot_start = file_name_to_check.find(".", found_out);
|
std::size_t found_dot_start = file_name_to_check.find(".", found_out);
|
||||||
std::size_t found_dot_end = file_name_to_check.find(".", found_dot_start);
|
std::size_t found_dot_end = file_name_to_check.find(".", found_dot_start);
|
||||||
|
|
||||||
|
@ -1518,8 +1518,8 @@ void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir,
|
||||||
file_name_to_check = file_path.substr(delim + 1);
|
file_name_to_check = file_path.substr(delim + 1);
|
||||||
}
|
}
|
||||||
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
||||||
file_name_to_check.find(prefix_dump_to_check) != std::string::npos &&
|
file_name_to_check.find("." + prefix_dump_to_check + ".") != std::string::npos &&
|
||||||
file_name_to_check.find(slot_string_to_check) != std::string::npos) {
|
file_name_to_check.find(slot_string_to_check + ".") != std::string::npos) {
|
||||||
matched_paths.push_back(file_path);
|
matched_paths.push_back(file_path);
|
||||||
found = true;
|
found = true;
|
||||||
}
|
}
|
||||||
|
@ -1663,7 +1663,7 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri
|
||||||
if (stripped_file_name.empty() || stripped_file_name.length() <= dump_name.length()) {
|
if (stripped_file_name.empty() || stripped_file_name.length() <= dump_name.length()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
std::size_t found = stripped_file_name.rfind(dump_name, 0);
|
std::size_t found = stripped_file_name.rfind(dump_name + ".", 0);
|
||||||
if (found == 0) {
|
if (found == 0) {
|
||||||
size_t slot = std::stoul(stripped_file_name.substr(dump_name.length() + 1));
|
size_t slot = std::stoul(stripped_file_name.substr(dump_name.length() + 1));
|
||||||
std::vector<int64_t> shape;
|
std::vector<int64_t> shape;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
# Copyright 2021-2022 Huawei Technologies Co., Ltd
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -66,9 +66,23 @@ class TestOfflineWatchpoints:
|
||||||
info4 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
info4 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
||||||
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
|
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
|
||||||
|
|
||||||
tensor_info = [info1, info2, info3, info4]
|
name4 = "Cast.Cast-op4.0.0.1"
|
||||||
tensor_name = [name1, name2, name2, name3]
|
tensor_all_zero = np.array([[[0, 0, 0],
|
||||||
tensor_list = [tensor1, tensor2, tensor3, tensor4]
|
[0, 0, 0],
|
||||||
|
[0, 0, 0]]], np.float32)
|
||||||
|
info5 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/Cast-op4",
|
||||||
|
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||||
|
|
||||||
|
name5 = "Cast.Cast-op40.0.0.1"
|
||||||
|
tensor_all_one = np.array([[[1, 1, 1],
|
||||||
|
[1, 1, 1],
|
||||||
|
[1, 1, 1]]], np.float32)
|
||||||
|
info6 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/Cast-op40",
|
||||||
|
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||||
|
|
||||||
|
tensor_info = [info1, info2, info3, info4, info5, info6]
|
||||||
|
tensor_name = [name1, name2, name2, name3, name4, name5]
|
||||||
|
tensor_list = [tensor1, tensor2, tensor3, tensor4, tensor_all_zero, tensor_all_one]
|
||||||
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
|
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -181,6 +195,28 @@ class TestOfflineWatchpoints:
|
||||||
assert not watchpoint_hits_test
|
assert not watchpoint_hits_test
|
||||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||||
|
|
||||||
|
@security_off_wrap
|
||||||
|
def test_async_watchpoints_no_duplicate_wp_hit(self):
|
||||||
|
"""
|
||||||
|
Feature: Offline Debugger CheckWatchpoint.
|
||||||
|
Description: Test check watchpoint hit with similar op name (one is the prefix of the other)
|
||||||
|
Expectation: Get exactly one watchpoint hit result and no duplicate watchpoints in the hit results.
|
||||||
|
"""
|
||||||
|
# watchpoint set and hit only one (watch_condition=3) in async mode
|
||||||
|
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
|
||||||
|
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
|
||||||
|
max_gt = d.Parameter(name="max_gt", disabled=False, value=0.0)
|
||||||
|
debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=3,
|
||||||
|
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/Cast-op4":
|
||||||
|
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||||
|
},
|
||||||
|
"Default/network-WithLossCell/_backbone-AlexNet/Cast-op40":
|
||||||
|
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||||
|
}}, parameter_list=[max_gt])
|
||||||
|
|
||||||
|
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=0)
|
||||||
|
assert len(watchpoint_hits_test) == 1
|
||||||
|
|
||||||
def compare_expect_actual_result(self, watchpoint_hits_list, test_index):
|
def compare_expect_actual_result(self, watchpoint_hits_list, test_index):
|
||||||
"""Compare actual result with golden file."""
|
"""Compare actual result with golden file."""
|
||||||
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
|
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
|
||||||
|
|
Loading…
Reference in New Issue