!22638 Add ut test for offline debugger

Merge pull request !22638 from sabrinasun_59ee/ut
This commit is contained in:
i-robot 2021-09-15 06:28:47 +00:00 committed by Gitee
commit 0d914dd427
18 changed files with 1340 additions and 735 deletions

View File

@ -255,7 +255,7 @@ void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned
// add analyzed tensor to cache // add analyzed tensor to cache
if (!recheck) { if (!recheck) {
wp_lock_.lock(); wp_lock_.lock();
wp_id_cache_[tensor_name].insert(id); (void)wp_id_cache_[tensor_name].insert(id);
wp_lock_.unlock(); wp_lock_.unlock();
} }
} }

View File

@ -43,7 +43,7 @@ class DbgLogger {
if (dbg_log_path != nullptr) { if (dbg_log_path != nullptr) {
char abspath[PATH_MAX]; char abspath[PATH_MAX];
if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) { if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) {
return; std::cout << "ERROR: DbgLogger could not create real path";
} }
FILE *fp = freopen(abspath, "a", stdout); FILE *fp = freopen(abspath, "a", stdout);
if (fp == nullptr) { if (fp == nullptr) {

View File

@ -0,0 +1,150 @@
[
{
"test1": {
"tensor_info": {
"node_name": "Default/Add-op4",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_base_info": {
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
2,
3
]
},
"tensor_stat_info": {
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
2,
3
],
"is_bool": false,
"max_vaue": 10.0,
"min_value": -11.0,
"avg_value": 0.880000114440918,
"count": 6,
"neg_zero_count": 2,
"pos_zero_count": 3,
"nan_count": 0,
"neg_inf_count": 0,
"pos_inf_count": 0,
"zero_count": 1
}
}
},
{
"test2": {
"tensor_info": {
"node_name": "Default/Reciprocal-op3",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_base_info": {
"size_in_bytes": 40,
"debugger_dtype": 11,
"shape": [
2,
5
]
},
"tensor_stat_info": {
"size_in_bytes": 40,
"debugger_dtype": 11,
"shape": [
2,
5
],
"is_bool": false,
"max_vaue": 1.0,
"min_value": 1.0,
"avg_value": 1.0,
"count": 10,
"neg_zero_count": 0,
"pos_zero_count": 2,
"nan_count": 0,
"neg_inf_count": 3,
"pos_inf_count": 5,
"zero_count": 0
}
}
},
{
"test3": {
"tensor_info": {
"node_name": "Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_base_info": {
"size_in_bytes": 20,
"debugger_dtype": 11,
"shape": [
5
]
},
"tensor_stat_info": {
"size_in_bytes": 20,
"debugger_dtype": 11,
"shape": [
5
],
"is_bool": false,
"max_vaue": 1.9901361465454102,
"min_value": -2.175431728363037,
"avg_value": -0.6648297309875488,
"count": 5,
"neg_zero_count": 2,
"pos_zero_count": 1,
"nan_count": 2,
"neg_inf_count": 0,
"pos_inf_count": 0,
"zero_count": 0
}
}
},
{
"test4": {
"tensor_info": {
"node_name": "invalid_name_for_test",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_base_info": {
"size_in_bytes": 0,
"debugger_dtype": 0,
"shape": []
},
"tensor_stat_info": {
"size_in_bytes": 0,
"debugger_dtype": 0,
"shape": [],
"is_bool": false,
"max_vaue": -1.7976931348623157e+308,
"min_value": 1.7976931348623157e+308,
"avg_value": 0.0,
"count": 0,
"neg_zero_count": 0,
"pos_zero_count": 0,
"nan_count": 0,
"neg_inf_count": 0,
"pos_inf_count": 0,
"zero_count": 0
}
}
}
]

View File

@ -0,0 +1,315 @@
[
{
"tensor_1": {
"tensor_info": {
"node_name": "Default/CudnnUniformReal-op391",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
66,
0,
0,
128,
69
],
"size_in_bytes": 8,
"debugger_dtype": 11,
"shape": [
2
]
}
}
},
{
"tensor_2": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
"slot": 1,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
0,
0,
0,
0,
66,
0,
0,
128,
69,
0,
0,
144,
64,
195,
245,
216,
64,
0,
0,
48,
193
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
2,
3
]
}
}
},
{
"tensor_3": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
"slot": 0,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
8,
255,
166,
56,
189,
58,
71,
56,
103,
3,
217,
55,
170,
225,
174,
56,
135,
195,
82,
56,
54,
253,
225,
55,
254,
158,
179,
56,
33,
66,
88,
56,
30,
248,
222,
55,
241,
32,
168,
56,
143,
126,
73,
56,
116,
129,
228,
55,
53,
254,
175,
56,
2,
0,
87,
56,
246,
124,
238,
55,
177,
160,
180,
56,
156,
126,
92,
56,
144,
121,
236,
55,
117,
189,
159,
56,
25,
132,
32,
56,
154,
1,
178,
54,
187,
189,
156,
56,
117,
252,
27,
56,
205,
2,
76,
54,
212,
127,
148,
56,
129,
1,
12,
56,
53,
253,
11,
182
],
"size_in_bytes": 108,
"debugger_dtype": 11,
"shape": [
3,
3,
3
]
}
}
},
{
"tensor_4": {
"tensor_info": {
"node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
"slot": 1,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
104,
60,
33,
79,
53,
6,
131,
78,
78,
232,
126,
79,
154,
198,
85,
79,
245,
52,
84,
78,
70,
207,
222,
78
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
6
]
}
}
},
{
"tensor_5": {
"tensor_info": {
"node_name": "Default/Reciprocal-op3",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
0,
0,
128,
63,
0,
0,
128,
255,
0,
0,
128,
127,
0,
0,
128,
255,
0,
0,
128,
127,
0,
0,
128,
127,
0,
0,
128,
63,
0,
0,
128,
255,
0,
0,
128,
127,
0,
0,
128,
127
],
"size_in_bytes": 40,
"debugger_dtype": 11,
"shape": [
2,
5
]
}
}
}
]

View File

@ -0,0 +1,56 @@
[
{
"tensor_1": {
"tensor_info": {
"node_name": "Default/CudnnUniformReal-op390",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [],
"size_in_bytes": 0,
"debugger_dtype": 0,
"shape": []
}
}
},
{
"tensor_2": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
"slot": 1,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [],
"size_in_bytes": 0,
"debugger_dtype": 0,
"shape": []
}
}
},
{
"tensor_3": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
"slot": 1,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [],
"size_in_bytes": 0,
"debugger_dtype": 0,
"shape": []
}
}
}
]

View File

@ -1,79 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/CudnnUniformReal-op391
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = False
tensor_data_1 attributes:
data (printed in uint8) = [ 0 0 0 66 0 0 128 69]
size in bytes = 8
debugger dtype = 11
shape = [2]
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
slot = 1
iteration = 1
rank_id = 0
root_graph_id = 0
is_output = False
tensor_data_2 attributes:
data (printed in uint8) = [ 0 0 0 0 0 0 0 66 0 0 128 69 0 0 144 64 195 245
216 64 0 0 48 193]
size in bytes = 24
debugger dtype = 11
shape = [2, 3]
-----------------------------------------------------------
tensor_info_3 attributes:
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424
slot = 0
iteration = 1
rank_id = 0
root_graph_id = 0
is_output = True
tensor_data_3 attributes:
data (printed in uint8) = [ 8 255 166 56 189 58 71 56 103 3 217 55 170 225 174 56 135 195
82 56 54 253 225 55 254 158 179 56 33 66 88 56 30 248 222 55
241 32 168 56 143 126 73 56 116 129 228 55 53 254 175 56 2 0
87 56 246 124 238 55 177 160 180 56 156 126 92 56 144 121 236 55
117 189 159 56 25 132 32 56 154 1 178 54 187 189 156 56 117 252
27 56 205 2 76 54 212 127 148 56 129 1 12 56 53 253 11 182]
size in bytes = 108
debugger dtype = 11
shape = [3, 3, 3]
-----------------------------------------------------------
tensor_info_4 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381
slot = 1
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_data_4 attributes:
data (printed in uint8) = [104 60 33 79 53 6 131 78 78 232 126 79 154 198 85 79 245 52
84 78 70 207 222 78]
size in bytes = 24
debugger dtype = 11
shape = [6]
-----------------------------------------------------------
tensor_info_5 attributes:
node name = Default/Reciprocal-op3
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_data_5 attributes:
data (printed in uint8) = [ 0 0 128 63 0 0 128 255 0 0 128 127 0 0 128 255 0 0
128 127 0 0 128 127 0 0 128 63 0 0 128 255 0 0 128 127
0 0 128 127]
size in bytes = 40
debugger dtype = 11
shape = [2, 5]

View File

@ -1,120 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/Add-op4
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_base_info:
size in bytes = 24
debugger dtype = 11
shape = [2, 3]
tensor_stat_info:
size in bytes = 24
debugger dtype = 11
shape = [2, 3]
is_bool = False
max_value = 10.0
min_value = -11.0
avg_value = 0.880000114440918
count = 6
neg_zero_count = 2
pos_zero_count = 3
nan_count = 0
neg_inf_count = 0
pos_inf_count = 0
zero_count = 1
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Default/Reciprocal-op3
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_base_info:
size in bytes = 40
debugger dtype = 11
shape = [2, 5]
tensor_stat_info:
size in bytes = 40
debugger dtype = 11
shape = [2, 5]
is_bool = False
max_value = 1.0
min_value = 1.0
avg_value = 1.0
count = 10
neg_zero_count = 0
pos_zero_count = 2
nan_count = 0
neg_inf_count = 3
pos_inf_count = 5
zero_count = 0
-----------------------------------------------------------
tensor_info_3 attributes:
node name = Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_base_info:
size in bytes = 20
debugger dtype = 11
shape = [5]
tensor_stat_info:
size in bytes = 20
debugger dtype = 11
shape = [5]
is_bool = False
max_value = 1.9901361465454102
min_value = -2.175431728363037
avg_value = -0.6648297309875488
count = 5
neg_zero_count = 2
pos_zero_count = 1
nan_count = 2
neg_inf_count = 0
pos_inf_count = 0
zero_count = 0
-----------------------------------------------------------
tensor_info_4 attributes:
node name = invalid_name_for_test
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = True
tensor_base_info:
size in bytes = 0
debugger dtype = 0
shape = []
tensor_stat_info:
size in bytes = 0
debugger dtype = 0
shape = []
is_bool = False
max_value = -1.7976931348623157e+308
min_value = 1.7976931348623157e+308
avg_value = 0.0
count = 0
neg_zero_count = 0
pos_zero_count = 0
nan_count = 0
neg_inf_count = 0
pos_inf_count = 0
zero_count = 0

View File

@ -1,28 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/CudnnUniformReal-op390
slot = 0
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = False
tensor_data_1 attributes:
data (printed in uint8) = []
size in bytes = 0
debugger dtype = 0
shape = []
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
slot = 1
iteration = 0
rank_id = 0
root_graph_id = 0
is_output = False
tensor_data_2 attributes:
data (printed in uint8) = []
size in bytes = 0
debugger dtype = 0
shape = []

View File

@ -1,33 +0,0 @@
-----------------------------------------------------------
watchpoint_hit for test_1 attributes:
name = Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369
slot = 1
condition = 6
watchpoint_id = 1
parameter 0 name = param
parameter 0 disabled = False
parameter 0 value = 0.0
parameter 0 hit = True
parameter 0 actual_value = -0.020966000854969025
error code = 0
rank_id = 0
root_graph_id = 0
-----------------------------------------------------------
watchpoint_hit for test_4 attributes:
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias
slot = 0
condition = 18
watchpoint_id = 3
parameter 0 name = abs_mean_update_ratio_gt
parameter 0 disabled = False
parameter 0 value = 0.0
parameter 0 hit = True
parameter 0 actual_value = 1.0156775705209766
parameter 1 name = epsilon
parameter 1 disabled = True
parameter 1 value = 0.0
parameter 1 hit = False
parameter 1 actual_value = 0.0
error code = 0
rank_id = 0
root_graph_id = 0

View File

@ -0,0 +1,77 @@
[
{
"watchpoint_hit1": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
"slot": 1,
"condition": 6,
"watchpoint_id": 1,
"paremeter": [
{
"parameter0": {
"name": "param",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": -0.020966000854969025
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
},
{
"watchpoint_hit2": {
"name": "Default/CudnnUniformReal-op391",
"slot": 0,
"condition": 6,
"watchpoint_id": 2,
"paremeter": [
{
"parameter0": {
"name": "param",
"disabled": false,
"value": 10.0,
"hit": true,
"actual_value": -4096.0
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
},
{
"watchpoint_hit3": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
"slot": 0,
"condition": 18,
"watchpoint_id": 3,
"paremeter": [
{
"parameter0": {
"name": "abs_mean_update_ratio_gt",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": 1.0156775705209766
}
},
{
"parameter1": {
"name": "epsilon",
"disabled": true,
"value": 0.0,
"hit": false,
"actual_value": 0.0
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
}
]

View File

@ -16,20 +16,11 @@
Utils for testing offline debugger. Utils for testing offline debugger.
""" """
import filecmp
import os import os
import tempfile import tempfile
import numpy as np import numpy as np
def compare_actual_with_expected(test_name):
"""Compare actual file with expected."""
is_eq = filecmp.cmp("../data/dump/gpu_dumps/golden/" +
test_name + ".expected", test_name + ".actual", shallow=False)
if os.path.exists(test_name + ".actual"):
os.remove(test_name + ".actual")
return is_eq
def build_dump_structure(tensor_name_list, tensor_list, net_name, tensor_info_list): def build_dump_structure(tensor_name_list, tensor_list, net_name, tensor_info_list):
"""Build dump file structure from tensor_list.""" """Build dump file structure from tensor_list."""
temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./") temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")

View File

@ -0,0 +1,180 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import os
import shutil
import json
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import build_dump_structure
from tests.security_utils import security_off_wrap
class TestOfflineReadTensor:
"""Test read tensor for offline debugger."""
GENERATE_GOLDEN = False
test_name = "read_tensors"
tensor_json = []
temp_dir = ''
@classmethod
def setup_class(cls):
"""Init setup for offline read tensor test"""
# input tensor with zero slot
tensor1 = np.array([32.0, 4096.0], np.float32)
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
# input tensor with non-zero slot
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
"gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
# output tensor with zero slot
tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
[8.339e-05, 5.025e-05, 2.694e-05],
[8.565e-05, 5.156e-05, 2.658e-05]],
[[8.017e-05, 4.804e-05, 2.724e-05],
[8.392e-05, 5.126e-05, 2.843e-05],
[8.613e-05, 5.257e-05, 2.819e-05]],
[[7.617e-05, 3.827e-05, 5.305e-06],
[7.474e-05, 3.719e-05, 3.040e-06],
[7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
"gradConv2D/Conv2DBackpropFilter-op424",
slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
# output tensor with non-zero slot
tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
name4 = "ReLUV2.ReLUV2-op381.0.0."
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
tensor_name = [name1, name2, name3, name4]
tensor_list = [tensor1, tensor2, tensor3, tensor4]
cls.tensor_info = [info1, info2, info3, info4]
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", cls.tensor_info)
# inf tensor
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf],
[np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
inf_name = "Reciprocal.Reciprocal-op3.0.0."
cls.inf_info = d.TensorInfo(node_name="Default/Reciprocal-op3",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
cls.inf_dir = build_dump_structure([inf_name], [inf_tensor], "Inf", [cls.inf_info])
@classmethod
def teardown_class(cls):
"""Run after test this class."""
shutil.rmtree(cls.temp_dir)
shutil.rmtree(cls.inf_dir)
@security_off_wrap
def test_sync_read_tensors(self):
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
tensor_data = debugger_backend.read_tensors(self.tensor_info)
if self.GENERATE_GOLDEN:
self.print_read_tensors(self.tensor_info, tensor_data, 0, False)
else:
self.compare_expect_actual_result(self.tensor_info, tensor_data, 0)
@security_off_wrap
def test_sync_read_inf_tensors(self):
debugger_backend = d.DbgServices(dump_file_path=self.inf_dir)
_ = debugger_backend.initialize(net_name="Inf", is_sync_mode=True)
tensor_data_inf = debugger_backend.read_tensors([self.inf_info])
if self.GENERATE_GOLDEN:
self.print_read_tensors([self.inf_info], tensor_data_inf, 4, False)
else:
self.compare_expect_actual_result([self.inf_info], tensor_data_inf, 4)
@security_off_wrap
def test_async_read_tensors(self):
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
tensor_data = debugger_backend.read_tensors(self.tensor_info)
if not self.GENERATE_GOLDEN:
self.compare_expect_actual_result(self.tensor_info, tensor_data, 0)
@security_off_wrap
def test_async_read_inf_tensors(self):
debugger_backend = d.DbgServices(dump_file_path=self.inf_dir)
_ = debugger_backend.initialize(net_name="Inf", is_sync_mode=False)
tensor_data_inf = debugger_backend.read_tensors([self.inf_info])
if not self.GENERATE_GOLDEN:
self.compare_expect_actual_result([self.inf_info], tensor_data_inf, 4)
def compare_expect_actual_result(self, tensor_info_list, tensor_data_list, test_index):
"""Compare actual result with golden file."""
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
self.test_name + "_expected.json"))
with open(golden_file) as f:
expected_list = json.load(f)
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
test_id = "tensor_"+ str(test_index+x+1)
info = expected_list[x+test_index][test_id]
assert tensor_info.node_name == info['tensor_info']['node_name']
assert tensor_info.slot == info['tensor_info']['slot']
assert tensor_info.iteration == info['tensor_info']['iteration']
assert tensor_info.rank_id == info['tensor_info']['rank_id']
assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
assert tensor_info.is_output == info['tensor_info']['is_output']
actual_data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
assert actual_data == info['tensor_data']['data']
assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
assert tensor_data.shape == info['tensor_data']['shape']
def print_read_tensors(self, tensor_info_list, tensor_data_list, test_index, is_print):
"""Print read tensors result if GENERATE_GOLDEN is True."""
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
tensor = "tensor_" + str(test_index+x+1)
data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
py_byte_size = len(tensor_data.data_ptr)
c_byte_size = tensor_data.data_size
if c_byte_size != py_byte_size:
print("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
self.tensor_json.append({
tensor: {
'tensor_info': {
'node_name': tensor_info.node_name,
'slot': tensor_info.slot,
'iteration': tensor_info.iteration,
'rank_id': tensor_info.rank_id,
'root_graph_id': tensor_info.root_graph_id,
'is_output': tensor_info.is_output
},
'tensor_data': {
'data': data,
'size_in_bytes': tensor_data.data_size,
'debugger_dtype': tensor_data.dtype,
'shape': tensor_data.shape
}
}
})
if is_print:
with open(self.test_name + "_expected.json", "w") as dump_f:
json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))

View File

@ -0,0 +1,164 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import os
import json
import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import build_dump_structure
from tests.security_utils import security_off_wrap
class TestOfflineReadNonExistTensor:
"""Test reading non exist tensor for offline debugger"""
GENERATE_GOLDEN = False
test_name = "read_tensors_nonexist_node"
tensor_json = []
temp_dir = ''
@classmethod
def setup_class(cls):
"""Init setup for offline read tensor test"""
tensor1 = np.array([32.0, 4096.0], np.float32)
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet"
"/gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
tensor_name = [name1, name2]
tensor_info = [info1, info2]
tensor_list = [tensor1, tensor2]
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
@classmethod
def teardown_class(cls):
shutil.rmtree(cls.temp_dir)
@security_off_wrap
def test_read_tensors_wrong_op_name(self):
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(
net_name="Test", is_sync_mode=True)
# non-existing tensor with wrong op name
info_nonexist = d.TensorInfo(node_name="Default/CudnnUniformReal-op390",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
tensor_data = debugger_backend.read_tensors([info_nonexist])
# Check the length of tensor data
assert len(tensor_data) == 1
if self.GENERATE_GOLDEN:
self.print_read_tensors([info_nonexist], tensor_data, 0, False)
else:
self.compare_expect_actual_result([info_nonexist], tensor_data, 0)
@security_off_wrap
def test_read_tensors_wrong_iteration(self):
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(
net_name="Test", is_sync_mode=True)
# non-existing tensor with wrong iteration number
info_nonexist = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
"gradReLU/ReluGradV2-op406",
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
tensor_data = debugger_backend.read_tensors([info_nonexist])
assert len(tensor_data) == 1
if self.GENERATE_GOLDEN:
self.print_read_tensors([info_nonexist], tensor_data, 1, True)
else:
self.compare_expect_actual_result([info_nonexist], tensor_data, 1)
@security_off_wrap
def test_read_tensors_wrong_is_output(self):
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(
net_name="Test", is_sync_mode=True)
# non-existing tensor with wrong is_output
info_nonexist = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
"gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
tensor_data = debugger_backend.read_tensors([info_nonexist])
assert len(tensor_data) == 1
if self.GENERATE_GOLDEN:
self.print_read_tensors([info_nonexist], tensor_data, 2, True)
else:
self.compare_expect_actual_result([info_nonexist], tensor_data, 2)
def compare_expect_actual_result(self, tensor_info_list, tensor_data_list, test_index):
"""Compare actual result with golden file."""
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
self.test_name + "_expected.json"))
with open(golden_file) as f:
expected_list = json.load(f)
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
tensor_id = "tensor_"+ str(test_index+x+1)
info = expected_list[x+test_index][tensor_id]
assert tensor_info.node_name == info['tensor_info']['node_name']
assert tensor_info.slot == info['tensor_info']['slot']
assert tensor_info.iteration == info['tensor_info']['iteration']
assert tensor_info.rank_id == info['tensor_info']['rank_id']
assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
assert tensor_info.is_output == info['tensor_info']['is_output']
actual_data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
assert actual_data == info['tensor_data']['data']
assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
assert tensor_data.shape == info['tensor_data']['shape']
def print_read_tensors(self, tensor_info_list, tensor_data_list, test_index, is_print):
"""Print read tensors result if GENERATE_GOLDEN is True."""
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
tensor = "tensor_" + str(test_index+x+1)
data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
py_byte_size = len(tensor_data.data_ptr)
c_byte_size = tensor_data.data_size
if c_byte_size != py_byte_size:
print("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
self.tensor_json.append({
tensor: {
'tensor_info': {
'node_name': tensor_info.node_name,
'slot': tensor_info.slot,
'iteration': tensor_info.iteration,
'rank_id': tensor_info.rank_id,
'root_graph_id': tensor_info.root_graph_id,
'is_output': tensor_info.is_output
},
'tensor_data': {
'data': data,
'size_in_bytes': tensor_data.data_size,
'debugger_dtype': tensor_data.dtype,
'shape': tensor_data.shape
}
}
})
if is_print:
with open(self.test_name + "_expected.json", "w") as dump_f:
json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))

View File

@ -1,127 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors"
@security_off_wrap
def test_sync_trans_false_read_tensors():
# input tensor with zero slot
tensor1 = np.array([32.0, 4096.0], np.float32)
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
# input tensor with non-zero slot
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
# output tensor with zero slot
tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
[8.339e-05, 5.025e-05, 2.694e-05],
[8.565e-05, 5.156e-05, 2.658e-05]],
[[8.017e-05, 4.804e-05, 2.724e-05],
[8.392e-05, 5.126e-05, 2.843e-05],
[8.613e-05, 5.257e-05, 2.819e-05]],
[[7.617e-05, 3.827e-05, 5.305e-06],
[7.474e-05, 3.719e-05, 3.040e-06],
[7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
"gradConv2D/Conv2DBackpropFilter-op424",
slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
# output tensor with non-zero slot
tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
name4 = "ReLUV2.ReLUV2-op381.0.0."
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
# inf tensor
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
inf_name = "Reciprocal.Reciprocal-op3.0.0."
inf_info = d.TensorInfo(node_name="Default/Reciprocal-op3",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
tensor_name = [name1, name2, name3, name4]
tensor_list = [tensor1, tensor2, tensor3, tensor4]
tensor_info = [info1, info2, info3, info4]
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
inf_dir = build_dump_structure([inf_name], [inf_tensor], "Inf", [inf_info])
debugger_backend1 = d.DbgServices(dump_file_path=temp_dir)
_ = debugger_backend1.initialize(net_name="alexnet", is_sync_mode=True)
tensor_data = debugger_backend1.read_tensors(tensor_info)
debugger_backend2 = d.DbgServices(dump_file_path=inf_dir)
_ = debugger_backend2.initialize(net_name="Inf", is_sync_mode=True)
tensor_data_inf = debugger_backend2.read_tensors([inf_info])
tensor_info.extend([inf_info])
tensor_data.extend(tensor_data_inf)
shutil.rmtree(temp_dir)
shutil.rmtree(inf_dir)
print_read_tensors(tensor_info, tensor_data)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_read_tensors(tensor_info, tensor_data):
"""Print read tensors."""
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
for x, _ in enumerate(tensor_info):
f_write.write(
"-----------------------------------------------------------\n")
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
f_write.write("node name = " + tensor_info[x].node_name + "\n")
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
f_write.write("root_graph_id = " +
str(tensor_info[x].root_graph_id) + "\n")
f_write.write("is_output = " +
str(tensor_info[x].is_output) + "\n")
f_write.write("\n")
f_write.write("tensor_data_" + str(x+1) + " attributes:\n")
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
py_byte_size = len(tensor_data[x].data_ptr)
c_byte_size = tensor_data[x].data_size
if c_byte_size != py_byte_size:
f_write.write("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
f_write.write("size in bytes = " +
str(tensor_data[x].data_size) + "\n")
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
f_write.close()
if __name__ == "__main__":
test_sync_trans_false_read_tensors()

View File

@ -15,82 +15,178 @@
""" """
Read tensor base and statistics test script for offline debugger APIs. Read tensor base and statistics test script for offline debugger APIs.
""" """
import os
import shutil import shutil
import json
import numpy as np import numpy as np
import mindspore.offline_debug.dbg_services as d import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure from dump_test_utils import build_dump_structure
from tests.security_utils import security_off_wrap from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors_base_stat"
class TestOfflineReadTensorBaseStat:
"""Test read tensor base stat for offline debugger"""
GENERATE_GOLDEN = False
test_name = "read_tensors_base_stat"
tensor_json = []
test_path = ''
@security_off_wrap @classmethod
def test_sync_read_tensors_base_stat(): def setup_class(cls):
"""Init setup for offline read tensor test"""
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
name1 = "Add.Add-op4.0.0."
info1 = d.TensorInfo(node_name="Default/Add-op4",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32) inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf],
name1 = "Add.Add-op4.0.0." [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
info1 = d.TensorInfo(node_name="Default/Add-op4", name2 = "Reciprocal.Reciprocal-op3.0.0."
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True) info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32) nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
name2 = "Reciprocal.Reciprocal-op3.0.0." name3 = "ReduceMean.ReduceMean-op92.0.0."
info2 = d.TensorInfo(node_name="Default/Reciprocal-op3", info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True) slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32) invalid_tensor = np.array([[1.1, -2.2], [3.3, -4.4]], np.float32)
name3 = "ReduceMean.ReduceMean-op92.0.0." name4 = "Add.Add-op1.0.0."
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92", info4 = d.TensorInfo(node_name="invalid_name_for_test",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True) slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
invalid_tensor = np.array([[1.1, -2.2], [3.3, -4.4]], np.float32) cls.tensor_info_1 = [info1]
name4 = "Add.Add-op1.0.0." cls.tensor_info_2 = [info2]
info4 = d.TensorInfo(node_name="invalid_name_for_test", cls.tensor_info_3 = [info3]
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True) cls.tensor_info_4 = [info4]
cls.tensor_info = [info1, info2, info3, info4]
cls.test_path = build_dump_structure([name1, name2, name3, name4],
[value_tensor, inf_tensor, nan_tensor, invalid_tensor],
"Test", cls.tensor_info)
cls.debugger_backend = d.DbgServices(dump_file_path=cls.test_path, verbose=True)
_ = cls.debugger_backend.initialize(net_name="Test", is_sync_mode=True)
tensor_info = [info1, info2, info3, info4] @classmethod
test_path = build_dump_structure([name1, name2, name3, name4], def teardown_class(cls):
[value_tensor, inf_tensor, nan_tensor, invalid_tensor], shutil.rmtree(cls.test_path)
"Test", tensor_info)
debugger_backend = d.DbgServices( @security_off_wrap
dump_file_path=test_path, verbose=True) def test_read_value_tensors_base_stat(self):
tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_1)
tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_1)
_ = debugger_backend.initialize( if self.GENERATE_GOLDEN:
net_name="Test", is_sync_mode=True) self.print_read_tensors(self.tensor_info_1, tensor_base_data_list, tensor_stat_data_list, 0, False)
else:
self.compare_expect_actual_result(self.tensor_info_1, tensor_base_data_list, tensor_stat_data_list, 0)
tensor_base_data_list = debugger_backend.read_tensor_base(tensor_info) @security_off_wrap
tensor_stat_data_list = debugger_backend.read_tensor_stats(tensor_info) def test_read_inf_tensors_base_stat(self):
shutil.rmtree(test_path) tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_2)
print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list) tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_2)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
if self.GENERATE_GOLDEN:
self.print_read_tensors(self.tensor_info_2, tensor_base_data_list, tensor_stat_data_list, 1, False)
else:
self.compare_expect_actual_result(self.tensor_info_2, tensor_base_data_list, tensor_stat_data_list, 1)
def print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list): @security_off_wrap
"""Print read tensors info.""" def test_read_nan_tensors_base_stat(self):
if GENERATE_GOLDEN: tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_3)
f_write = open(test_name + ".expected", "w") tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_3)
else:
f_write = open(test_name + ".actual", "w") if self.GENERATE_GOLDEN:
for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info, self.print_read_tensors(self.tensor_info_3, tensor_base_data_list, tensor_stat_data_list, 2, False)
tensor_base_data_list, else:
tensor_stat_data_list)): self.compare_expect_actual_result(self.tensor_info_3, tensor_base_data_list, tensor_stat_data_list, 2)
f_write.write(
"-----------------------------------------------------------\n") @security_off_wrap
f_write.write("tensor_info_" + str(x+1) + " attributes:\n") def test_read_inv_tensors_base_stat(self):
f_write.write("node name = " + tensor_info_item.node_name + "\n") tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_4)
f_write.write("slot = " + str(tensor_info_item.slot) + "\n") tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_4)
f_write.write("iteration = " + str(tensor_info_item.iteration) + "\n")
f_write.write("rank_id = " + str(tensor_info_item.rank_id) + "\n") if self.GENERATE_GOLDEN:
f_write.write("root_graph_id = " + self.print_read_tensors(self.tensor_info_4, tensor_base_data_list, tensor_stat_data_list, 3, True)
str(tensor_info_item.root_graph_id) + "\n") else:
f_write.write("is_output = " + self.compare_expect_actual_result(self.tensor_info_4, tensor_base_data_list, tensor_stat_data_list, 3)
str(tensor_info_item.is_output) + "\n")
f_write.write("\n") def compare_expect_actual_result(self, tensor_info, tensor_base_data_list, tensor_stat_data_list, test_index):
f_write.write("tensor_base_info:\n") """Compare actual result with golden file."""
f_write.write(str(tensor_base) + "\n") golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
f_write.write("\n") self.test_name + "_expected.json"))
f_write.write("tensor_stat_info:\n") with open(golden_file) as f:
f_write.write(str(tensor_stat) + '\n') expected_list = json.load(f)
f_write.close()
for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info,
tensor_base_data_list,
tensor_stat_data_list)):
test_id = "test"+ str(test_index+x+1)
info_json = expected_list[x+test_index][test_id]['tensor_info']
base_json = expected_list[x+test_index][test_id]['tensor_base_info']
stat_json = expected_list[x+test_index][test_id]['tensor_stat_info']
assert tensor_info_item.node_name == info_json['node_name']
assert tensor_info_item.slot == info_json['slot']
assert tensor_info_item.iteration == info_json['iteration']
assert tensor_info_item.rank_id == info_json['rank_id']
assert tensor_info_item.root_graph_id == info_json['root_graph_id']
assert tensor_info_item.is_output == info_json['is_output']
assert tensor_base.data_size == base_json['size_in_bytes']
assert tensor_base.dtype == base_json['debugger_dtype']
assert tensor_base.shape == base_json['shape']
assert tensor_stat.data_size == stat_json['size_in_bytes']
assert tensor_stat.dtype == stat_json['debugger_dtype']
assert tensor_stat.shape == stat_json['shape']
assert tensor_stat.is_bool == stat_json['is_bool']
assert tensor_stat.max_value == stat_json['max_vaue']
assert tensor_stat.min_value == stat_json['min_value']
assert tensor_stat.avg_value == stat_json['avg_value']
assert tensor_stat.count == stat_json['count']
assert tensor_stat.neg_zero_count == stat_json['neg_zero_count']
assert tensor_stat.pos_zero_count == stat_json['pos_zero_count']
assert tensor_stat.nan_count == stat_json['nan_count']
assert tensor_stat.neg_inf_count == stat_json['neg_inf_count']
assert tensor_stat.pos_inf_count == stat_json['pos_inf_count']
assert tensor_stat.zero_count == stat_json['zero_count']
def print_read_tensors(self, tensor_info, tensor_base_data_list, tensor_stat_data_list, test_index, is_print):
"""Print read tensors info."""
for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info,
tensor_base_data_list,
tensor_stat_data_list)):
test_name = "test" + str(test_index+x+1)
self.tensor_json.append({
test_name: {
'tensor_info': {
'node_name': tensor_info_item.node_name,
'slot': tensor_info_item.slot,
'iteration': tensor_info_item.iteration,
'rank_id': tensor_info_item.rank_id,
'root_graph_id': tensor_info_item.root_graph_id,
'is_output': tensor_info_item.is_output
},
'tensor_base_info': {
'size_in_bytes': tensor_base.data_size,
'debugger_dtype': tensor_base.dtype,
'shape': tensor_base.shape
},
'tensor_stat_info': {
'size_in_bytes': tensor_stat.data_size,
'debugger_dtype': tensor_stat.dtype,
'shape': tensor_stat.shape,
'is_bool': tensor_stat.is_bool,
'max_vaue': tensor_stat.max_value,
'min_value': tensor_stat.min_value,
'avg_value': tensor_stat.avg_value,
'count': tensor_stat.count,
'neg_zero_count': tensor_stat.neg_zero_count,
'pos_zero_count': tensor_stat.pos_zero_count,
'nan_count': tensor_stat.nan_count,
'neg_inf_count': tensor_stat.neg_inf_count,
'pos_inf_count': tensor_stat.pos_inf_count,
'zero_count': tensor_stat.zero_count
}
}})
if is_print:
with open(self.test_name + "_expected.json", "w") as dump_f:
json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))

View File

@ -1,107 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors_nonexist_node"
@security_off_wrap
def test_sync_trans_read_tensors_nonexist_node():
tensor1 = np.array([32.0, 4096.0], np.float32)
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
# non-existing tensor with wrong op name
info3 = d.TensorInfo(node_name="Default/CudnnUniformReal-op390",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
# non-existing tensor with wrong iteration number
info4 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
tensor_name = [name1, name2]
tensor_create_info = [info1, info2]
tensor_list = [tensor1, tensor2]
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_create_info)
tensor_check_info = [info3, info4]
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
_ = debugger_backend.initialize(
net_name="alexnet", is_sync_mode=True)
tensor_data = debugger_backend.read_tensors(tensor_check_info)
# Check the length of tensor list
assert len(tensor_check_info) == 2
assert len(tensor_data) == 2
print_read_tensors(tensor_check_info, tensor_data)
shutil.rmtree(temp_dir)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_read_tensors(tensor_info, tensor_data):
"""Print read tensors."""
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
for x, _ in enumerate(tensor_info):
f_write.write(
"-----------------------------------------------------------\n")
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
f_write.write("node name = " + tensor_info[x].node_name + "\n")
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
f_write.write("root_graph_id = " +
str(tensor_info[x].root_graph_id) + "\n")
f_write.write("is_output = " +
str(tensor_info[x].is_output) + "\n")
f_write.write("\n")
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
py_byte_size = len(tensor_data[x].data_ptr)
c_byte_size = tensor_data[x].data_size
if c_byte_size != py_byte_size:
f_write.write("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
f_write.write("size in bytes = " +
str(tensor_data[x].data_size) + "\n")
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
f_write.close()
if __name__ == "__main__":
test_sync_trans_read_tensors_nonexist_node()

View File

@ -1,168 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Watchpoints test script for offline debugger APIs.
"""
import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_watchpoints"
@security_off_wrap
def test_sync_trans_false_watchpoints():
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
name1 = "Conv2D.Conv2D-op369.0.0."
tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
[-1.3931e-02, 8.9359e-04, -1.1520e-02],
[-6.3248e-03, 1.8749e-03, 1.0132e-02]],
[[-2.5520e-03, -6.0005e-03, -5.1918e-03],
[-2.7866e-03, 2.5487e-04, 8.4782e-04],
[-4.6310e-03, -8.9111e-03, -8.1778e-05]],
[[1.3914e-03, 6.0844e-04, 1.0643e-03],
[-2.0966e-02, -1.2865e-03, -1.8692e-03],
[-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
name2 = "Parameter.fc2.bias.0.0."
tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
2.1177532e-07, 2.9952851e-06], np.float32)
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
-5.1546101e-07, 6.0798648e-06], np.float32)
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
name3 = "Parameter.fc3.bias.0.0."
tensor4 = np.array([2.2930422e-04, -3.6369250e-04, 7.1337068e-04, -1.9567949e-05], np.float32)
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias",
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
tensor_info = [info1, info2, info3, info4]
tensor_name = [name1, name2, name2, name3]
tensor_list = [tensor1, tensor2, tensor3, tensor4]
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=True)
# NOTES:
# -> watch_condition=6 is MIN_LT
# -> watch_condition=18 is CHANGE_TOO_LARGE
# -> watch_condition=20 is NOT_CHANGE
# test 1: watchpoint set and hit (watch_condition=6)
param1 = d.Parameter(name="param", disabled=False, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
"Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param1])
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
if len(watchpoint_hits_test_1) != 1:
f_write.write(
"ERROR -> test 1: watchpoint set but not hit just once\n")
print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
# test 2: watchpoint remove and ensure it's not hit
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_2:
f_write.write("ERROR -> test 2: watchpoint removed but hit\n")
# test 3: watchpoint set and not hit, then remove
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
"Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param2])
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_3:
f_write.write(
"ERROR -> test 3: watchpoint set but not supposed to be hit\n")
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
# test 4: weight change watchpoint set and hit
param_abs_mean_update_ratio_gt = d.Parameter(
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias":
{"rank_id": [0], "root_graph_id": [0], "is_output": True
}}, parameter_list=[param_abs_mean_update_ratio_gt,
param_epsilon])
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
if len(watchpoint_hits_test_4) != 1:
f_write.write("ERROR -> test 4: watchpoint weight change set but not hit just once\n")
print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
f_write.close()
shutil.rmtree(temp_dir)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
"""Print watchpoint hits."""
for x, _ in enumerate(watchpoint_hits):
f_write.write(
"-----------------------------------------------------------\n")
f_write.write("watchpoint_hit for test_%u attributes:" %
test_id + "\n")
f_write.write("name = " + watchpoint_hits[x].name + "\n")
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
f_write.write("condition = " +
str(watchpoint_hits[x].condition) + "\n")
f_write.write("watchpoint_id = " +
str(watchpoint_hits[x].watchpoint_id) + "\n")
for p, _ in enumerate(watchpoint_hits[x].parameters):
f_write.write("parameter " + str(p) + " name = " +
watchpoint_hits[x].parameters[p].name + "\n")
f_write.write("parameter " + str(p) + " disabled = " +
str(watchpoint_hits[x].parameters[p].disabled) + "\n")
f_write.write("parameter " + str(p) + " value = " +
str(watchpoint_hits[x].parameters[p].value) + "\n")
f_write.write("parameter " + str(p) + " hit = " +
str(watchpoint_hits[x].parameters[p].hit) + "\n")
f_write.write("parameter " + str(p) + " actual_value = " +
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
f_write.write("error code = " +
str(watchpoint_hits[x].error_code) + "\n")
f_write.write("rank_id = " +
str(watchpoint_hits[x].rank_id) + "\n")
f_write.write("root_graph_id = " +
str(watchpoint_hits[x].root_graph_id) + "\n")
if __name__ == "__main__":
test_sync_trans_false_watchpoints()

View File

@ -0,0 +1,238 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Watchpoints test script for offline debugger APIs.
"""
import os
import json
import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import build_dump_structure
from tests.security_utils import security_off_wrap
class TestOfflineWatchpoints:
"""Test watchpoint for offline debugger."""
GENERATE_GOLDEN = False
test_name = "watchpoints"
watchpoint_hits_json = []
temp_dir = ''
@classmethod
def setup_class(cls):
"""Init setup for offline watchpoints test"""
name1 = "Conv2D.Conv2D-op369.0.0.1"
tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
[-1.3931e-02, 8.9359e-04, -1.1520e-02],
[-6.3248e-03, 1.8749e-03, 1.0132e-02]],
[[-2.5520e-03, -6.0005e-03, -5.1918e-03],
[-2.7866e-03, 2.5487e-04, 8.4782e-04],
[-4.6310e-03, -8.9111e-03, -8.1778e-05]],
[[1.3914e-03, 6.0844e-04, 1.0643e-03],
[-2.0966e-02, -1.2865e-03, -1.8692e-03],
[-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
name2 = "Parameter.fc2.bias.0.0.2"
tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
2.1177532e-07, 2.9952851e-06], np.float32)
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias",
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
-5.1546101e-07, 6.0798648e-06], np.float32)
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias",
slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
name3 = "CudnnUniformReal.CudnnUniformReal-op391.0.0.3"
tensor4 = np.array([-32.0, -4096.0], np.float32)
info4 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
tensor_info = [info1, info2, info3, info4]
tensor_name = [name1, name2, name2, name3]
tensor_list = [tensor1, tensor2, tensor3, tensor4]
cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
@classmethod
def teardown_class(cls):
shutil.rmtree(cls.temp_dir)
@security_off_wrap
def test_sync_add_remove_watchpoints_hit(self):
# NOTES: watch_condition=6 is MIN_LT
# watchpoint set and hit (watch_condition=6), then remove it
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
param = d.Parameter(name="param", disabled=False, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
"/conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param])
# add second watchpoint to check the watchpoint hit in correct order
param1 = d.Parameter(name="param", disabled=False, value=10.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/CudnnUniformReal-op391":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param1])
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
assert len(watchpoint_hits_test) == 2
if self.GENERATE_GOLDEN:
self.print_watchpoint_hits(watchpoint_hits_test, 0, False)
else:
self.compare_expect_actual_result(watchpoint_hits_test, 0)
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
assert len(watchpoint_hits_test_1) == 1
@security_off_wrap
def test_sync_add_remove_watchpoints_not_hit(self):
# watchpoint set and not hit(watch_condition=6), then remove
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
param = d.Parameter(name="param", disabled=False, value=-1000.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
"/conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param])
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
assert not watchpoint_hits_test
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
@security_off_wrap
def test_sync_weight_change_watchpoints_hit(self):
# NOTES: watch_condition=18 is CHANGE_TOO_LARGE
# weight change watchpoint set and hit(watch_condition=18)
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
param_abs_mean_update_ratio_gt = d.Parameter(
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias":
{"rank_id": [0], "root_graph_id": [0], "is_output": True
}}, parameter_list=[param_abs_mean_update_ratio_gt,
param_epsilon])
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=3)
assert len(watchpoint_hits_test) == 1
if self.GENERATE_GOLDEN:
self.print_watchpoint_hits(watchpoint_hits_test, 2, True)
else:
self.compare_expect_actual_result(watchpoint_hits_test, 2)
@security_off_wrap
def test_async_add_remove_watchpoint_hit(self):
# watchpoint set and hit(watch_condition=6) in async mode, then remove
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
param = d.Parameter(name="param", disabled=False, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
"/conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param])
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
assert len(watchpoint_hits_test) == 1
if not self.GENERATE_GOLDEN:
self.compare_expect_actual_result(watchpoint_hits_test, 0)
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
assert not watchpoint_hits_test_1
@security_off_wrap
def test_async_add_remove_watchpoints_not_hit(self):
# watchpoint set and not hit(watch_condition=6) in async mode, then remove
debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
_ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
param = d.Parameter(name="param", disabled=False, value=-1000.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
"/conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param])
watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
assert not watchpoint_hits_test
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
def compare_expect_actual_result(self, watchpoint_hits_list, test_index):
"""Compare actual result with golden file."""
golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
self.test_name + "_expected.json"))
with open(golden_file) as f:
expected_list = json.load(f)
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
test_id = "watchpoint_hit" + str(test_index+x+1)
info = expected_list[x+test_index][test_id]
assert watchpoint_hits.name == info['name']
assert watchpoint_hits.slot == info['slot']
assert watchpoint_hits.condition == info['condition']
assert watchpoint_hits.watchpoint_id == info['watchpoint_id']
assert watchpoint_hits.error_code == info['error_code']
assert watchpoint_hits.rank_id == info['rank_id']
assert watchpoint_hits.root_graph_id == info['root_graph_id']
for p, _ in enumerate(watchpoint_hits.parameters):
parameter = "parameter" + str(p)
assert watchpoint_hits.parameters[p].name == info['paremeter'][p][parameter]['name']
assert watchpoint_hits.parameters[p].disabled == info['paremeter'][p][parameter]['disabled']
assert watchpoint_hits.parameters[p].value == info['paremeter'][p][parameter]['value']
assert watchpoint_hits.parameters[p].hit == info['paremeter'][p][parameter]['hit']
assert watchpoint_hits.parameters[p].actual_value == info['paremeter'][p][parameter]['actual_value']
def print_watchpoint_hits(self, watchpoint_hits_list, test_index, is_print):
"""Print watchpoint hits."""
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
parameter_json = []
for p, _ in enumerate(watchpoint_hits.parameters):
parameter = "parameter" + str(p)
parameter_json.append({
parameter: {
'name': watchpoint_hits.parameters[p].name,
'disabled': watchpoint_hits.parameters[p].disabled,
'value': watchpoint_hits.parameters[p].value,
'hit': watchpoint_hits.parameters[p].hit,
'actual_value': watchpoint_hits.parameters[p].actual_value
}
})
watchpoint_hit = "watchpoint_hit" + str(test_index+x+1)
self.watchpoint_hits_json.append({
watchpoint_hit: {
'name': watchpoint_hits.name,
'slot': watchpoint_hits.slot,
'condition': watchpoint_hits.condition,
'watchpoint_id': watchpoint_hits.watchpoint_id,
'paremeter': parameter_json,
'error_code': watchpoint_hits.error_code,
'rank_id': watchpoint_hits.rank_id,
'root_graph_id': watchpoint_hits.root_graph_id
}
})
if is_print:
with open(self.test_name + "_expected.json", "w") as dump_f:
json.dump(self.watchpoint_hits_json, dump_f, indent=4, separators=(',', ': '))