forked from mindspore-Ecosystem/mindspore
!23949 Add st offline debugger test
Merge pull request !23949 from sabrinasun_59ee/sttest
This commit is contained in:
commit
e2b07117e1
|
@ -16,15 +16,28 @@
|
|||
Utils for testing offline debugger.
|
||||
"""
|
||||
|
||||
import filecmp
|
||||
import os
|
||||
import tempfile
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compare_actual_with_expected(test_name):
|
||||
"""Compare actual file with expected."""
|
||||
pwd = os.getcwd()
|
||||
is_eq = filecmp.cmp(pwd + "/golden/" +
|
||||
test_name + ".expected", test_name + ".actual", shallow=False)
|
||||
if os.path.exists(test_name + ".actual"):
|
||||
os.remove(test_name + ".actual")
|
||||
return is_eq
|
||||
def build_dump_structure(path, tensor_name_list, tensor_list, net_name, tensor_info_list):
|
||||
"""Build dump file structure from tensor_list."""
|
||||
temp_dir = tempfile.mkdtemp(prefix=net_name, dir=path)
|
||||
for tensor_name, tensor, tensor_info in zip(tensor_name_list, tensor_list, tensor_info_list):
|
||||
slot = str(tensor_info.slot)
|
||||
iteration = str(tensor_info.iteration)
|
||||
rank_id = str(tensor_info.rank_id)
|
||||
root_graph_id = str(tensor_info.root_graph_id)
|
||||
is_output = str(tensor_info.is_output)
|
||||
path = os.path.join(temp_dir, "rank_" + rank_id, net_name, root_graph_id, iteration)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
if is_output == "True":
|
||||
file = tempfile.mkstemp(prefix=tensor_name, suffix=".output." + slot +
|
||||
".DefaultFormat.npy", dir=path)
|
||||
else:
|
||||
file = tempfile.mkstemp(prefix=tensor_name, suffix=".input." + slot +
|
||||
".DefaultFormat.npy", dir=path)
|
||||
full_path = file[1]
|
||||
np.save(full_path, tensor)
|
||||
return temp_dir
|
||||
|
|
|
@ -0,0 +1,253 @@
|
|||
[
|
||||
{
|
||||
"tensor_1": {
|
||||
"tensor_info": {
|
||||
"node_name": "Default/CudnnUniformReal-op391",
|
||||
"slot": 0,
|
||||
"iteration": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": false
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
66,
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
69
|
||||
],
|
||||
"size_in_bytes": 8,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
2
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_2": {
|
||||
"tensor_info": {
|
||||
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
|
||||
"slot": 1,
|
||||
"iteration": 1,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": false
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
66,
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
69,
|
||||
0,
|
||||
0,
|
||||
144,
|
||||
64,
|
||||
195,
|
||||
245,
|
||||
216,
|
||||
64,
|
||||
0,
|
||||
0,
|
||||
48,
|
||||
193
|
||||
],
|
||||
"size_in_bytes": 24,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
2,
|
||||
3
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_3": {
|
||||
"tensor_info": {
|
||||
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
|
||||
"slot": 0,
|
||||
"iteration": 1,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": true
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
8,
|
||||
255,
|
||||
166,
|
||||
56,
|
||||
189,
|
||||
58,
|
||||
71,
|
||||
56,
|
||||
103,
|
||||
3,
|
||||
217,
|
||||
55,
|
||||
170,
|
||||
225,
|
||||
174,
|
||||
56,
|
||||
135,
|
||||
195,
|
||||
82,
|
||||
56,
|
||||
54,
|
||||
253,
|
||||
225,
|
||||
55,
|
||||
254,
|
||||
158,
|
||||
179,
|
||||
56,
|
||||
33,
|
||||
66,
|
||||
88,
|
||||
56,
|
||||
30,
|
||||
248,
|
||||
222,
|
||||
55,
|
||||
241,
|
||||
32,
|
||||
168,
|
||||
56,
|
||||
143,
|
||||
126,
|
||||
73,
|
||||
56,
|
||||
116,
|
||||
129,
|
||||
228,
|
||||
55,
|
||||
53,
|
||||
254,
|
||||
175,
|
||||
56,
|
||||
2,
|
||||
0,
|
||||
87,
|
||||
56,
|
||||
246,
|
||||
124,
|
||||
238,
|
||||
55,
|
||||
177,
|
||||
160,
|
||||
180,
|
||||
56,
|
||||
156,
|
||||
126,
|
||||
92,
|
||||
56,
|
||||
144,
|
||||
121,
|
||||
236,
|
||||
55,
|
||||
117,
|
||||
189,
|
||||
159,
|
||||
56,
|
||||
25,
|
||||
132,
|
||||
32,
|
||||
56,
|
||||
154,
|
||||
1,
|
||||
178,
|
||||
54,
|
||||
187,
|
||||
189,
|
||||
156,
|
||||
56,
|
||||
117,
|
||||
252,
|
||||
27,
|
||||
56,
|
||||
205,
|
||||
2,
|
||||
76,
|
||||
54,
|
||||
212,
|
||||
127,
|
||||
148,
|
||||
56,
|
||||
129,
|
||||
1,
|
||||
12,
|
||||
56,
|
||||
53,
|
||||
253,
|
||||
11,
|
||||
182
|
||||
],
|
||||
"size_in_bytes": 108,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
3,
|
||||
3,
|
||||
3
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_4": {
|
||||
"tensor_info": {
|
||||
"node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
|
||||
"slot": 1,
|
||||
"iteration": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": true
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
104,
|
||||
60,
|
||||
33,
|
||||
79,
|
||||
53,
|
||||
6,
|
||||
131,
|
||||
78,
|
||||
78,
|
||||
232,
|
||||
126,
|
||||
79,
|
||||
154,
|
||||
198,
|
||||
85,
|
||||
79,
|
||||
245,
|
||||
52,
|
||||
84,
|
||||
78,
|
||||
70,
|
||||
207,
|
||||
222,
|
||||
78
|
||||
],
|
||||
"size_in_bytes": 24,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
6
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
|
@ -1,28 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [149 167 122 ... 160 212 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 12, 13, 13, 16]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/ReLUV2-op348
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 20 21 18 ... 126 98 25]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -1,14 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.1417236328125
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 1
|
|
@ -0,0 +1,55 @@
|
|||
[
|
||||
{
|
||||
"watchpoint_hit1": {
|
||||
"name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
|
||||
"slot": 1,
|
||||
"condition": 6,
|
||||
"watchpoint_id": 1,
|
||||
"parameter": [
|
||||
{
|
||||
"parameter0": {
|
||||
"name": "param",
|
||||
"disabled": false,
|
||||
"value": 0.0,
|
||||
"hit": true,
|
||||
"actual_value": -0.020966000854969025
|
||||
}
|
||||
}
|
||||
],
|
||||
"error_code": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"watchpoint_hit2": {
|
||||
"name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
|
||||
"slot": 0,
|
||||
"condition": 18,
|
||||
"watchpoint_id": 3,
|
||||
"parameter": [
|
||||
{
|
||||
"parameter0": {
|
||||
"name": "abs_mean_update_ratio_gt",
|
||||
"disabled": false,
|
||||
"value": 0.0,
|
||||
"hit": true,
|
||||
"actual_value": 1.0156775705209766
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameter1": {
|
||||
"name": "epsilon",
|
||||
"disabled": true,
|
||||
"value": 0.0,
|
||||
"hit": false,
|
||||
"actual_value": 0.0
|
||||
}
|
||||
}
|
||||
],
|
||||
"error_code": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,253 @@
|
|||
[
|
||||
{
|
||||
"tensor_1": {
|
||||
"tensor_info": {
|
||||
"node_name": "Default/CudnnUniformReal-op391",
|
||||
"slot": 0,
|
||||
"iteration": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": false
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
66,
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
69
|
||||
],
|
||||
"size_in_bytes": 8,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
2
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_2": {
|
||||
"tensor_info": {
|
||||
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
|
||||
"slot": 1,
|
||||
"iteration": 1,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": false
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
66,
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
69,
|
||||
0,
|
||||
0,
|
||||
144,
|
||||
64,
|
||||
195,
|
||||
245,
|
||||
216,
|
||||
64,
|
||||
0,
|
||||
0,
|
||||
48,
|
||||
193
|
||||
],
|
||||
"size_in_bytes": 24,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
2,
|
||||
3
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_3": {
|
||||
"tensor_info": {
|
||||
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
|
||||
"slot": 0,
|
||||
"iteration": 1,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": true
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
8,
|
||||
255,
|
||||
166,
|
||||
56,
|
||||
189,
|
||||
58,
|
||||
71,
|
||||
56,
|
||||
103,
|
||||
3,
|
||||
217,
|
||||
55,
|
||||
170,
|
||||
225,
|
||||
174,
|
||||
56,
|
||||
135,
|
||||
195,
|
||||
82,
|
||||
56,
|
||||
54,
|
||||
253,
|
||||
225,
|
||||
55,
|
||||
254,
|
||||
158,
|
||||
179,
|
||||
56,
|
||||
33,
|
||||
66,
|
||||
88,
|
||||
56,
|
||||
30,
|
||||
248,
|
||||
222,
|
||||
55,
|
||||
241,
|
||||
32,
|
||||
168,
|
||||
56,
|
||||
143,
|
||||
126,
|
||||
73,
|
||||
56,
|
||||
116,
|
||||
129,
|
||||
228,
|
||||
55,
|
||||
53,
|
||||
254,
|
||||
175,
|
||||
56,
|
||||
2,
|
||||
0,
|
||||
87,
|
||||
56,
|
||||
246,
|
||||
124,
|
||||
238,
|
||||
55,
|
||||
177,
|
||||
160,
|
||||
180,
|
||||
56,
|
||||
156,
|
||||
126,
|
||||
92,
|
||||
56,
|
||||
144,
|
||||
121,
|
||||
236,
|
||||
55,
|
||||
117,
|
||||
189,
|
||||
159,
|
||||
56,
|
||||
25,
|
||||
132,
|
||||
32,
|
||||
56,
|
||||
154,
|
||||
1,
|
||||
178,
|
||||
54,
|
||||
187,
|
||||
189,
|
||||
156,
|
||||
56,
|
||||
117,
|
||||
252,
|
||||
27,
|
||||
56,
|
||||
205,
|
||||
2,
|
||||
76,
|
||||
54,
|
||||
212,
|
||||
127,
|
||||
148,
|
||||
56,
|
||||
129,
|
||||
1,
|
||||
12,
|
||||
56,
|
||||
53,
|
||||
253,
|
||||
11,
|
||||
182
|
||||
],
|
||||
"size_in_bytes": 108,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
3,
|
||||
3,
|
||||
3
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"tensor_4": {
|
||||
"tensor_info": {
|
||||
"node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
|
||||
"slot": 1,
|
||||
"iteration": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0,
|
||||
"is_output": true
|
||||
},
|
||||
"tensor_data": {
|
||||
"data": [
|
||||
104,
|
||||
60,
|
||||
33,
|
||||
79,
|
||||
53,
|
||||
6,
|
||||
131,
|
||||
78,
|
||||
78,
|
||||
232,
|
||||
126,
|
||||
79,
|
||||
154,
|
||||
198,
|
||||
85,
|
||||
79,
|
||||
245,
|
||||
52,
|
||||
84,
|
||||
78,
|
||||
70,
|
||||
207,
|
||||
222,
|
||||
78
|
||||
],
|
||||
"size_in_bytes": 24,
|
||||
"debugger_dtype": 11,
|
||||
"shape": [
|
||||
6
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
|
@ -1,73 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [170 19 44 181 254 212 16 52 52 162 148 180 130 115 226 180 183 243
|
||||
101 52 224 79 189 51 10 70 69 51 199 75 159 52 79 98 104 52
|
||||
106 77 19 52 129 183 8 180 252 58 48 180 35 219 9 52 240 201
|
||||
179 51 142 151 158 51 210 145 182 53 140 219 0 53 140 219 22 181
|
||||
46 33 87 180 238 90 122 180 166 10 38 179 202 195 4 53 166 10
|
||||
150 51 214 120 209 52 235 115 37 180 92 177 215 180 0 136 84 51
|
||||
72 114 145 180 43 169 255 180 114 27 61 52 76 225 122 50 126 72
|
||||
159 51 58 35 202 51 114 61 106 51 60 223 63 52 209 179 1 52
|
||||
232 217 44 178 130 158 109 179 213 231 10 179 37 40 94 179 208 68
|
||||
64 53 6 52 249 52 162 35 1 181 231 29 155 52 30 201 69 180
|
||||
229 131 126 51 18 165 109 180 164 112 163 181 116 172 11 178 6 129
|
||||
37 52 54 205 203 180 115 104 145 52 232 106 219 179 36 40 214 52
|
||||
202 50 204 52 76 89 38 179 230 140 232 178 168 53 77 52 180 191
|
||||
108 51 128 183 64 51 56 137 161 180 247 6 143 180 126 63 197 180
|
||||
198 177 94 52 140 185 139 51 150 178 228 180 255 67 150 52 134 201
|
||||
164 52 107 43 14 53 174 216 63 179 40 160 41 53 120 88 72 179
|
||||
218 172 234 52 234 38 25 52 85 159 155 180 254 67 138 180 34 253
|
||||
118 180 218 61 17 52 242 133 253 52 175 37 180 52 171 62 163 52
|
||||
202 195 86 53 160 171 45 52 34 31 176 180 156 85 5 53 178 191
|
||||
68 180 42 203 140 52 248 117 72 52 248 253 212 176 195 100 202 51
|
||||
87 14 141 52 91 100 235 51 48 221 136 52 143 117 17 180 51 196
|
||||
25 52 127 29 112 180 152 144 207 178 219 104 64 52 21 174 251 52
|
||||
164 78 138 181 20 63 6 52 10 249 96 179 163 146 18 53 200 186
|
||||
236 52 2 188 85 52 124 140 121 179 246 185 22 181 246 74 249 51
|
||||
70 182 135 53 189 227 76 52 249 160 159 180 134 235 65 53 64 164
|
||||
255 51 224 156 41 53 142 117 69 181 247 151 101 53 185 175 35 52
|
||||
164 112 21 53 30 31 212 179 142 151 110 179 176 148 29 181 206 204
|
||||
88 53 116 215 214 180 172 173 216 51 106 222 153 180 200 152 19 181
|
||||
176 3 7 52 215 52 87 52]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [181 167 46 26 122 155 141 164 212 39 111 27 247 156 1 152 189 36
|
||||
15 161 254 167 82 163 33 42 101 158 225 161 24 167 103 140 45 42
|
||||
178 170 173 29 48 42 39 32 56 25 216 170 128 41 216 23 153 154
|
||||
39 173 193 42 84 160 111 22 61 144]
|
||||
size in bytes = 64
|
||||
debugger dtype = 10
|
||||
shape = [2, 2, 2, 2, 2]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 50 17 122 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -1,33 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.08050537109375
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
parameter 0 name = abs_mean_update_ratio_gt
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 0.5243796973599475
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
|
@ -1,98 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [230 208 10 52 104 34 252 52 4 231 144 52 188 150 64 180 88 236
|
||||
15 180 254 135 180 51 131 226 147 52 88 202 62 53 2 43 55 53
|
||||
231 29 87 180 220 249 30 180 157 17 177 180 81 107 140 181 8 95
|
||||
192 180 89 134 112 180 96 238 90 178 156 196 212 180 206 25 15 181
|
||||
212 154 6 180 91 211 116 52 191 14 140 51 128 106 124 53 28 158
|
||||
70 181 182 21 251 50 100 204 157 179 88 202 42 180 7 95 8 53
|
||||
128 251 238 52 241 133 241 52 111 86 157 179 48 221 148 180 200 7
|
||||
141 180 236 226 182 51 190 82 158 180 140 108 179 180 195 134 215 179
|
||||
103 213 39 179 89 168 149 180 42 58 58 180 64 53 62 179 250 126
|
||||
158 52 38 83 117 52 0 0 136 180 136 133 122 51 110 18 131 179
|
||||
238 13 94 51 102 136 15 181 134 90 227 180 16 11 117 180 35 74
|
||||
163 52 105 0 87 181 112 18 131 50 226 233 67 181 217 172 10 52
|
||||
206 25 217 52 208 213 22 52 146 203 87 180 74 46 207 52 178 191
|
||||
4 180 100 93 216 52 119 190 171 180 223 2 5 181 128 72 207 179
|
||||
58 146 11 179 224 79 137 52 143 228 154 180 246 219 215 179 14 79
|
||||
195 52 126 29 64 52 132 192 42 51 94 220 86 52 94 109 1 181
|
||||
72 37 117 178 110 197 94 180 160 94 153 179 118 224 80 181 156 17
|
||||
37 50 120 156 162 53 26 115 135 180 228 20 29 53 145 126 147 52
|
||||
99 16 48 180 211 188 199 180 52 51 99 180 93 254 227 52 152 126
|
||||
123 49 6 18 16 181 5 163 130 51 27 158 98 53 134 235 189 52
|
||||
119 45 9 180 130 115 110 52 158 128 162 52 232 251 197 180 178 46
|
||||
158 179 57 214 157 52 172 207 161 180 208 0 222 49 242 99 32 53
|
||||
20 174 135 50 247 117 176 52 194 57 43 180 140 108 135 51 243 65
|
||||
175 51 187 73 156 51 63 232 217 50 180 234 115 52 194 168 148 52
|
||||
27 192 183 180 45 178 157 52 125 208 17 53 236 192 65 53 190 193
|
||||
7 53 254 246 57 53 3 43 199 51 64 164 215 180 220 104 240 51
|
||||
23 72 24 180 68 173 9 51 72 114 29 53 105 0 57 181 188 150
|
||||
8 53 229 97 131 53 0 34 189 51 163 146 74 53 31 244 204 51
|
||||
86 193 220 180 156 51 146 179]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 99 26 69 41 190 38 128 38 232 38 16 39 5 39 24 39 1 39
|
||||
218 38 219 38 43 39 241 33 21 165 159 32 15 145 191 28 66 30
|
||||
110 30 149 31 14 29 179 29 249 28 94 29 141 156 210 36 143 166
|
||||
201 162 5 165 54 166 100 165 57 165 81 165 25 166 150 165 236 164
|
||||
20 164 238 165 170 20 200 168 16 168 36 169 9 169 195 168 64 168
|
||||
248 168 10 169 20 168 56 167 137 167 124 168 221 152 35 168 163 167
|
||||
110 169 147 168 198 167 52 168 91 168 14 168 30 168 240 167 171 168
|
||||
235 168 37 161 222 165 16 161 88 164 68 162 156 152 109 151 181 156
|
||||
0 152 84 158 112 154 193 161 13 162 172 28 38 163 16 31 255 26
|
||||
102 21 64 31 177 28 102 156 77 20 62 25 177 26 26 22 241 24
|
||||
188 33 149 160 67 36 171 35 38 36 68 34 148 19 54 162 53 161
|
||||
174 156 195 134 139 24 210 35 175 36 206 158 136 37 88 36 31 36
|
||||
78 20 203 159 6 165 235 163 83 162 7 157 76 31 240 35 38 37
|
||||
20 160 193 38 130 29 95 23 177 161 143 162 46 165 103 164 106 163
|
||||
167 162 36 158 130 161 149 33 171 157 138 37 252 27 198 164 116 166
|
||||
60 165 36 165 47 165 150 166 188 166 112 167 58 166 33 140 141 163
|
||||
93 32 38 159 13 168 194 166 78 166 8 166 201 165 115 166 128 166
|
||||
77 166 29 166 131 157 150 31 46 32 124 164 239 166 219 165 96 166
|
||||
216 166 21 167 28 167 35 167 237 165 202 164 57 32 75 26 208 40
|
||||
148 40 205 40 162 40 187 40 181 40 181 40 155 40 124 40 129 40
|
||||
157 40 186 29 253 32 138 44 226 43 43 43 237 42 164 42 137 42
|
||||
174 42 179 42 160 42 104 42 30 42 53 38 140 25 240 44 120 44
|
||||
236 42 19 43 143 42 6 42 181 41 83 42 0 43 112 42 97 41
|
||||
27 32 177 32 254 44 105 43 242 40 239 40 71 41 223 40 237 40
|
||||
93 41 22 41 211 40 227 40 187 20 71 30 4 44 188 40 79 36
|
||||
133 38 62 39 209 38 15 38 83 38 136 38 146 38 100 37 118 152
|
||||
185 149 165 42 99 41 61 36 241 37 34 38 170 38 62 38 69 39
|
||||
215 39 128 39 49 38 54 33 141 161 184 41 34 40 100 36 230 37
|
||||
133 38 57 37 224 35 7 37]
|
||||
size in bytes = 512
|
||||
debugger dtype = 10
|
||||
shape = [4, 4, 4, 4]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [19 17 27 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
|
@ -0,0 +1,55 @@
|
|||
[
|
||||
{
|
||||
"watchpoint_hit1": {
|
||||
"name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
|
||||
"slot": 1,
|
||||
"condition": 6,
|
||||
"watchpoint_id": 1,
|
||||
"parameter": [
|
||||
{
|
||||
"parameter0": {
|
||||
"name": "param",
|
||||
"disabled": false,
|
||||
"value": 0.0,
|
||||
"hit": true,
|
||||
"actual_value": -0.020966000854969025
|
||||
}
|
||||
}
|
||||
],
|
||||
"error_code": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"watchpoint_hit2": {
|
||||
"name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
|
||||
"slot": 0,
|
||||
"condition": 18,
|
||||
"watchpoint_id": 3,
|
||||
"parameter": [
|
||||
{
|
||||
"parameter0": {
|
||||
"name": "abs_mean_update_ratio_gt",
|
||||
"disabled": false,
|
||||
"value": 0.0,
|
||||
"hit": true,
|
||||
"actual_value": 1.0156775705209766
|
||||
}
|
||||
},
|
||||
{
|
||||
"parameter1": {
|
||||
"name": "epsilon",
|
||||
"disabled": true,
|
||||
"value": 0.0,
|
||||
"hit": false,
|
||||
"actual_value": 0.0
|
||||
}
|
||||
}
|
||||
],
|
||||
"error_code": 0,
|
||||
"rank_id": 0,
|
||||
"root_graph_id": 0
|
||||
}
|
||||
}
|
||||
]
|
|
@ -1,86 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
import pytest
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from tests.security_utils import security_off_wrap
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "async_sink_mode_true_read_tensors"
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.skip(reason="needs updating")
|
||||
@security_off_wrap
|
||||
def test_async_sink_mode_true_read_tensors():
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/workspace/mindspore_dataset/dumps/async_sink_true/")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# output tensor with zero slot
|
||||
info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"conv3-Conv2d/Conv2D-op169",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"ReLUV2-op348",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write("-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
|
@ -1,109 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import pytest
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from tests.security_utils import security_off_wrap
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "async_sink_mode_true_watchpoints"
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.skip(reason="needs updating")
|
||||
@security_off_wrap
|
||||
def test_async_sink_mode_true_watchpoints():
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/workspace/mindspore_dataset/dumps/async_sink_true/")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1],
|
||||
"is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
f_write.write("ERROR -> test 1: watchpoint set but not hit just once\n")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
f_write.write("ERROR -> test 2: watchpoint removed but hit\n")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1],
|
||||
"is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
f_write.write("ERROR -> test 3: watchpoint set but not supposed to be hit\n")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
f_write.close()
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
f_write.write("-----------------------------------------------------------\n")
|
||||
f_write.write("watchpoint_hit for test_%u attributes:" % test_id + "\n")
|
||||
f_write.write("name = " + watchpoint_hits[x].name + "\n")
|
||||
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
|
||||
f_write.write("condition = " + str(watchpoint_hits[x].condition) + "\n")
|
||||
f_write.write("watchpoint_id = " + str(watchpoint_hits[x].watchpoint_id) + "\n")
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
f_write.write("parameter " + str(p) + " name = " +
|
||||
watchpoint_hits[x].parameters[p].name + "\n")
|
||||
f_write.write("parameter " + str(p) + " disabled = " +
|
||||
str(watchpoint_hits[x].parameters[p].disabled) + "\n")
|
||||
f_write.write("parameter " + str(p) + " value = " +
|
||||
str(watchpoint_hits[x].parameters[p].value) + "\n")
|
||||
f_write.write("parameter " + str(p) + " hit = " +
|
||||
str(watchpoint_hits[x].parameters[p].hit) + "\n")
|
||||
f_write.write("parameter " + str(p) + " actual_value = " +
|
||||
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
|
||||
f_write.write("error code = " + str(watchpoint_hits[x].error_code) + "\n")
|
||||
f_write.write("device_id = " + str(watchpoint_hits[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " + str(watchpoint_hits[x].root_graph_id) + "\n")
|
|
@ -0,0 +1,159 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import tempfile
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
import pytest
|
||||
from tests.security_utils import security_off_wrap
|
||||
from dump_test_utils import build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
tensor_json = []
|
||||
|
||||
|
||||
def run_read_tensors(is_sync):
|
||||
if is_sync:
|
||||
test_name = "sync_read_tensors"
|
||||
else:
|
||||
test_name = "async_read_tensors"
|
||||
|
||||
# input tensor with zero slot
|
||||
tensor1 = np.array([32.0, 4096.0], np.float32)
|
||||
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
|
||||
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
|
||||
# input tensor with non-zero slot
|
||||
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
|
||||
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
|
||||
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
|
||||
"gradReLU/ReluGradV2-op406",
|
||||
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
|
||||
# output tensor with zero slot
|
||||
tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
|
||||
[8.339e-05, 5.025e-05, 2.694e-05],
|
||||
[8.565e-05, 5.156e-05, 2.658e-05]],
|
||||
[[8.017e-05, 4.804e-05, 2.724e-05],
|
||||
[8.392e-05, 5.126e-05, 2.843e-05],
|
||||
[8.613e-05, 5.257e-05, 2.819e-05]],
|
||||
[[7.617e-05, 3.827e-05, 5.305e-06],
|
||||
[7.474e-05, 3.719e-05, 3.040e-06],
|
||||
[7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
|
||||
name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
|
||||
info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
|
||||
"gradConv2D/Conv2DBackpropFilter-op424",
|
||||
slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
|
||||
# output tensor with non-zero slot
|
||||
tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
|
||||
name4 = "ReLUV2.ReLUV2-op381.0.0."
|
||||
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
|
||||
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_name = [name1, name2, name3, name4]
|
||||
tensor_list = [tensor1, tensor2, tensor3, tensor4]
|
||||
tensor_info = [info1, info2, info3, info4]
|
||||
|
||||
pwd = os.getcwd()
|
||||
with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
|
||||
temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info)
|
||||
|
||||
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
|
||||
debugger_backend.initialize(net_name="Test", is_sync_mode=is_sync)
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
if GENERATE_GOLDEN:
|
||||
print_read_tensors(tensor_info, tensor_data, 0, True, test_name)
|
||||
else:
|
||||
compare_expect_actual_result(tensor_info, tensor_data, 0, test_name)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_sync_read_tensors():
|
||||
run_read_tensors(True)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_async_read_tensors():
|
||||
run_read_tensors(False)
|
||||
|
||||
|
||||
def compare_expect_actual_result(tensor_info_list, tensor_data_list, test_index, test_name):
|
||||
"""Compare actual result with golden file."""
|
||||
pwd = os.getcwd()
|
||||
golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json"))
|
||||
with open(golden_file) as f:
|
||||
expected_list = json.load(f)
|
||||
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
|
||||
test_id = "tensor_"+ str(test_index+x+1)
|
||||
info = expected_list[x+test_index][test_id]
|
||||
assert tensor_info.node_name == info['tensor_info']['node_name']
|
||||
assert tensor_info.slot == info['tensor_info']['slot']
|
||||
assert tensor_info.iteration == info['tensor_info']['iteration']
|
||||
assert tensor_info.rank_id == info['tensor_info']['rank_id']
|
||||
assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
|
||||
assert tensor_info.is_output == info['tensor_info']['is_output']
|
||||
actual_data = np.frombuffer(
|
||||
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
|
||||
assert actual_data == info['tensor_data']['data']
|
||||
assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
|
||||
assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
|
||||
assert tensor_data.shape == info['tensor_data']['shape']
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info_list, tensor_data_list, test_index, is_print, test_name):
|
||||
"""Print read tensors result if GENERATE_GOLDEN is True."""
|
||||
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
|
||||
tensor = "tensor_" + str(test_index+x+1)
|
||||
data = np.frombuffer(
|
||||
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
|
||||
py_byte_size = len(tensor_data.data_ptr)
|
||||
c_byte_size = tensor_data.data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
tensor_json.append({
|
||||
tensor: {
|
||||
'tensor_info': {
|
||||
'node_name': tensor_info.node_name,
|
||||
'slot': tensor_info.slot,
|
||||
'iteration': tensor_info.iteration,
|
||||
'rank_id': tensor_info.rank_id,
|
||||
'root_graph_id': tensor_info.root_graph_id,
|
||||
'is_output': tensor_info.is_output
|
||||
},
|
||||
'tensor_data': {
|
||||
'data': data,
|
||||
'size_in_bytes': tensor_data.data_size,
|
||||
'debugger_dtype': tensor_data.dtype,
|
||||
'shape': tensor_data.shape
|
||||
}
|
||||
}
|
||||
})
|
||||
if is_print:
|
||||
with open(test_name + "_expected.json", "w") as dump_f:
|
||||
json.dump(tensor_json, dump_f, indent=4, separators=(',', ': '))
|
|
@ -1,89 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
import pytest
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from tests.security_utils import security_off_wrap
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_false_read_tensors"
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.skip(reason="needs updating")
|
||||
@security_off_wrap
|
||||
def test_sync_trans_false_read_tensors():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_false/alexnet/")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write("-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
|
@ -1,128 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import pytest
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from tests.security_utils import security_off_wrap
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_false_watchpoints"
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.skip(reason="needs updating")
|
||||
@security_off_wrap
|
||||
def test_sync_trans_false_watchpoints():
|
||||
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_false/alexnet/")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op168":
|
||||
{"device_id": [0], "root_graph_id": [0],
|
||||
"is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
f_write.write("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
f_write.write("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0],
|
||||
"is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
f_write.write("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0],
|
||||
"is_parameter": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
f_write.write("ERROR -> test 4: watchpoint weight change set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
|
||||
f_write.close()
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
f_write.write("-----------------------------------------------------------\n")
|
||||
f_write.write("watchpoint_hit for test_%u attributes:" % test_id + "\n")
|
||||
f_write.write("name = " + watchpoint_hits[x].name + "\n")
|
||||
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
|
||||
f_write.write("condition = " + str(watchpoint_hits[x].condition) + "\n")
|
||||
f_write.write("watchpoint_id = " + str(watchpoint_hits[x].watchpoint_id) + "\n")
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
f_write.write("parameter " + str(p) + " name = " +
|
||||
watchpoint_hits[x].parameters[p].name + "\n")
|
||||
f_write.write("parameter " + str(p) + " disabled = " +
|
||||
str(watchpoint_hits[x].parameters[p].disabled) + "\n")
|
||||
f_write.write("parameter " + str(p) + " value = " +
|
||||
str(watchpoint_hits[x].parameters[p].value) + "\n")
|
||||
f_write.write("parameter " + str(p) + " hit = " +
|
||||
str(watchpoint_hits[x].parameters[p].hit) + "\n")
|
||||
f_write.write("parameter " + str(p) + " actual_value = " +
|
||||
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
|
||||
f_write.write("error code = " + str(watchpoint_hits[x].error_code) + "\n")
|
||||
f_write.write("device_id = " + str(watchpoint_hits[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " + str(watchpoint_hits[x].root_graph_id) + "\n")
|
|
@ -1,89 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
import pytest
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from tests.security_utils import security_off_wrap
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_true_read_tensors"
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.skip(reason="needs updating")
|
||||
@security_off_wrap
|
||||
def test_sync_trans_true_read_tensors():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_true/alexnet/")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write("-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
|
@ -0,0 +1,204 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import tempfile
|
||||
import numpy as np
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import pytest
|
||||
from tests.security_utils import security_off_wrap
|
||||
from dump_test_utils import build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
watchpoint_hits_json = []
|
||||
|
||||
|
||||
def run_watchpoints(is_sync):
|
||||
if is_sync:
|
||||
test_name = "sync_watchpoints"
|
||||
else:
|
||||
test_name = "async_watchpoints"
|
||||
|
||||
name1 = "Conv2D.Conv2D-op369.0.0.1"
|
||||
tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
|
||||
[-1.3931e-02, 8.9359e-04, -1.1520e-02],
|
||||
[-6.3248e-03, 1.8749e-03, 1.0132e-02]],
|
||||
[[-2.5520e-03, -6.0005e-03, -5.1918e-03],
|
||||
[-2.7866e-03, 2.5487e-04, 8.4782e-04],
|
||||
[-4.6310e-03, -8.9111e-03, -8.1778e-05]],
|
||||
[[1.3914e-03, 6.0844e-04, 1.0643e-03],
|
||||
[-2.0966e-02, -1.2865e-03, -1.8692e-03],
|
||||
[-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
|
||||
slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
|
||||
|
||||
name2 = "Parameter.fc2.bias.0.0.2"
|
||||
tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
|
||||
2.1177532e-07, 2.9952851e-06], np.float32)
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc2.bias",
|
||||
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
|
||||
-5.1546101e-07, 6.0798648e-06], np.float32)
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc2.bias",
|
||||
slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
tensor_name = [name1, name2, name2]
|
||||
tensor_list = [tensor1, tensor2, tensor3]
|
||||
|
||||
pwd = os.getcwd()
|
||||
with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
|
||||
temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info)
|
||||
|
||||
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
|
||||
debugger_backend.initialize(net_name="Test", is_sync_mode=False)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
|
||||
"conv1-Conv2d/Conv2D-op369":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
assert len(watchpoint_hits_test_1) == 1
|
||||
if GENERATE_GOLDEN:
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 0, False, test_name)
|
||||
else:
|
||||
compare_expect_actual_result(watchpoint_hits_test_1, 0, test_name)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
assert not watchpoint_hits_test_2
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
|
||||
"conv1-Conv2d/Conv2D-op369":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
assert not watchpoint_hits_test_3
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc2.bias":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
assert len(watchpoint_hits_test_4) == 1
|
||||
|
||||
if GENERATE_GOLDEN:
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 1, True, test_name)
|
||||
else:
|
||||
compare_expect_actual_result(watchpoint_hits_test_4, 1, test_name)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_sync_watchpoints():
|
||||
run_watchpoints(True)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
@security_off_wrap
|
||||
def test_async_watchpoints():
|
||||
run_watchpoints(False)
|
||||
|
||||
|
||||
def compare_expect_actual_result(watchpoint_hits_list, test_index, test_name):
|
||||
"""Compare actual result with golden file."""
|
||||
pwd = os.getcwd()
|
||||
golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json"))
|
||||
with open(golden_file) as f:
|
||||
expected_list = json.load(f)
|
||||
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
|
||||
test_id = "watchpoint_hit" + str(test_index+x+1)
|
||||
info = expected_list[x+test_index][test_id]
|
||||
assert watchpoint_hits.name == info['name']
|
||||
assert watchpoint_hits.slot == info['slot']
|
||||
assert watchpoint_hits.condition == info['condition']
|
||||
assert watchpoint_hits.watchpoint_id == info['watchpoint_id']
|
||||
assert watchpoint_hits.error_code == info['error_code']
|
||||
assert watchpoint_hits.rank_id == info['rank_id']
|
||||
assert watchpoint_hits.root_graph_id == info['root_graph_id']
|
||||
for p, _ in enumerate(watchpoint_hits.parameters):
|
||||
parameter = "parameter" + str(p)
|
||||
assert watchpoint_hits.parameters[p].name == info['parameter'][p][parameter]['name']
|
||||
assert watchpoint_hits.parameters[p].disabled == info['parameter'][p][parameter]['disabled']
|
||||
assert watchpoint_hits.parameters[p].value == info['parameter'][p][parameter]['value']
|
||||
assert watchpoint_hits.parameters[p].hit == info['parameter'][p][parameter]['hit']
|
||||
assert watchpoint_hits.parameters[p].actual_value == info['parameter'][p][parameter]['actual_value']
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits_list, test_index, is_print, test_name):
|
||||
"""Print watchpoint hits."""
|
||||
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
|
||||
parameter_json = []
|
||||
for p, _ in enumerate(watchpoint_hits.parameters):
|
||||
parameter = "parameter" + str(p)
|
||||
parameter_json.append({
|
||||
parameter: {
|
||||
'name': watchpoint_hits.parameters[p].name,
|
||||
'disabled': watchpoint_hits.parameters[p].disabled,
|
||||
'value': watchpoint_hits.parameters[p].value,
|
||||
'hit': watchpoint_hits.parameters[p].hit,
|
||||
'actual_value': watchpoint_hits.parameters[p].actual_value
|
||||
}
|
||||
})
|
||||
watchpoint_hit = "watchpoint_hit" + str(test_index+x+1)
|
||||
watchpoint_hits_json.append({
|
||||
watchpoint_hit: {
|
||||
'name': watchpoint_hits.name,
|
||||
'slot': watchpoint_hits.slot,
|
||||
'condition': watchpoint_hits.condition,
|
||||
'watchpoint_id': watchpoint_hits.watchpoint_id,
|
||||
'parameter': parameter_json,
|
||||
'error_code': watchpoint_hits.error_code,
|
||||
'rank_id': watchpoint_hits.rank_id,
|
||||
'root_graph_id': watchpoint_hits.root_graph_id
|
||||
}
|
||||
})
|
||||
if is_print:
|
||||
with open(test_name + "_expected.json", "w") as dump_f:
|
||||
json.dump(watchpoint_hits_json, dump_f, indent=4, separators=(',', ': '))
|
Loading…
Reference in New Issue