bugfix: duplicate wp hit results in offline dbg
This commit is contained in:
parent
dab15fb156
commit
23eb886d11
|
@ -891,7 +891,7 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f
|
|||
if (last_slash_pos != std::string::npos) {
|
||||
file_n = file_to_find.substr(last_slash_pos + 1);
|
||||
}
|
||||
if (candidate.find(file_n) != std::string::npos && candidate.rfind(kNpyExt) != std::string::npos) {
|
||||
if (candidate.find(file_n + ".") != std::string::npos && candidate.rfind(kNpyExt) != std::string::npos) {
|
||||
// we found a converted file for this op
|
||||
std::string found_file = dump_key + "/" + candidate;
|
||||
result_list->insert(found_file);
|
||||
|
@ -1044,8 +1044,8 @@ void DebugServices::GetTensorDataInfoAsync(const std::vector<std::tuple<std::str
|
|||
if (delim != std::string::npos) {
|
||||
file_name_to_check = file_name.substr(delim + 1);
|
||||
}
|
||||
std::size_t found = file_name_to_check.find(dump_name);
|
||||
std::size_t found_out = file_name_to_check.find(output_str);
|
||||
std::size_t found = file_name_to_check.find("." + dump_name + ".");
|
||||
std::size_t found_out = file_name_to_check.find(output_str, found + dump_name.length());
|
||||
std::size_t found_dot_start = file_name_to_check.find(".", found_out);
|
||||
std::size_t found_dot_end = file_name_to_check.find(".", found_dot_start);
|
||||
|
||||
|
@ -1518,8 +1518,8 @@ void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir,
|
|||
file_name_to_check = file_path.substr(delim + 1);
|
||||
}
|
||||
if (file_path.find(specific_dump_dir) != std::string::npos &&
|
||||
file_name_to_check.find(prefix_dump_to_check) != std::string::npos &&
|
||||
file_name_to_check.find(slot_string_to_check) != std::string::npos) {
|
||||
file_name_to_check.find("." + prefix_dump_to_check + ".") != std::string::npos &&
|
||||
file_name_to_check.find(slot_string_to_check + ".") != std::string::npos) {
|
||||
matched_paths.push_back(file_path);
|
||||
found = true;
|
||||
}
|
||||
|
@ -1663,7 +1663,7 @@ void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::stri
|
|||
if (stripped_file_name.empty() || stripped_file_name.length() <= dump_name.length()) {
|
||||
continue;
|
||||
}
|
||||
std::size_t found = stripped_file_name.rfind(dump_name, 0);
|
||||
std::size_t found = stripped_file_name.rfind(dump_name + ".", 0);
|
||||
if (found == 0) {
|
||||
size_t slot = std::stoul(stripped_file_name.substr(dump_name.length() + 1));
|
||||
std::vector<int64_t> shape;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
# Copyright 2021-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -66,9 +66,23 @@ class TestOfflineWatchpoints:
|
|||
info4 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
||||
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
|
||||
|
||||
tensor_info = [info1, info2, info3, info4]
|
||||
tensor_name = [name1, name2, name2, name3]
|
||||
tensor_list = [tensor1, tensor2, tensor3, tensor4]
|
||||
name4 = "Cast.Cast-op4.0.0.1"
|
||||
tensor_all_zero = np.array([[[0, 0, 0],
|
||||
[0, 0, 0],
|
||||
[0, 0, 0]]], np.float32)
|
||||
info5 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/Cast-op4",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
name5 = "Cast.Cast-op40.0.0.1"
|
||||
tensor_all_one = np.array([[[1, 1, 1],
|
||||
[1, 1, 1],
|
||||
[1, 1, 1]]], np.float32)
|
||||
info6 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/Cast-op40",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_info = [info1, info2, info3, info4, info5, info6]
|
||||
tensor_name = [name1, name2, name2, name3, name4, name5]
|
||||
tensor_list = [tensor1, tensor2, tensor3, tensor4, tensor_all_zero, tensor_all_one]
|
||||
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
|
||||
|
||||
@classmethod
|
||||
|
@ -181,6 +195,28 @@ class TestOfflineWatchpoints:
|
|||
assert not watchpoint_hits_test
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
@security_off_wrap
|
||||
def test_async_watchpoints_no_duplicate_wp_hit(self):
|
||||
"""
|
||||
Feature: Offline Debugger CheckWatchpoint.
|
||||
Description: Test check watchpoint hit with similar op name (one is the prefix of the other)
|
||||
Expectation: Get exactly one watchpoint hit result and no duplicate watchpoints in the hit results.
|
||||
"""
|
||||
# watchpoint set and hit only one (watch_condition=3) in async mode
|
||||
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
|
||||
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
|
||||
max_gt = d.Parameter(name="max_gt", disabled=False, value=0.0)
|
||||
debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=3,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/Cast-op4":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||
},
|
||||
"Default/network-WithLossCell/_backbone-AlexNet/Cast-op40":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||
}}, parameter_list=[max_gt])
|
||||
|
||||
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=0)
|
||||
assert len(watchpoint_hits_test) == 1
|
||||
|
||||
def compare_expect_actual_result(self, watchpoint_hits_list, test_index):
|
||||
"""Compare actual result with golden file."""
|
||||
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
|
||||
|
|
Loading…
Reference in New Issue