!23949 Add st offline debugger test

Merge pull request !23949 from sabrinasun_59ee/sttest
This commit is contained in:
i-robot 2021-09-29 22:01:19 +00:00 committed by Gitee
commit e2b07117e1
17 changed files with 1001 additions and 756 deletions

View File

@ -16,15 +16,28 @@
Utils for testing offline debugger.
"""
import filecmp
import os
import tempfile
import numpy as np
def compare_actual_with_expected(test_name):
"""Compare actual file with expected."""
pwd = os.getcwd()
is_eq = filecmp.cmp(pwd + "/golden/" +
test_name + ".expected", test_name + ".actual", shallow=False)
if os.path.exists(test_name + ".actual"):
os.remove(test_name + ".actual")
return is_eq
def build_dump_structure(path, tensor_name_list, tensor_list, net_name, tensor_info_list):
"""Build dump file structure from tensor_list."""
temp_dir = tempfile.mkdtemp(prefix=net_name, dir=path)
for tensor_name, tensor, tensor_info in zip(tensor_name_list, tensor_list, tensor_info_list):
slot = str(tensor_info.slot)
iteration = str(tensor_info.iteration)
rank_id = str(tensor_info.rank_id)
root_graph_id = str(tensor_info.root_graph_id)
is_output = str(tensor_info.is_output)
path = os.path.join(temp_dir, "rank_" + rank_id, net_name, root_graph_id, iteration)
os.makedirs(path, exist_ok=True)
if is_output == "True":
file = tempfile.mkstemp(prefix=tensor_name, suffix=".output." + slot +
".DefaultFormat.npy", dir=path)
else:
file = tempfile.mkstemp(prefix=tensor_name, suffix=".input." + slot +
".DefaultFormat.npy", dir=path)
full_path = file[1]
np.save(full_path, tensor)
return temp_dir

View File

@ -0,0 +1,253 @@
[
{
"tensor_1": {
"tensor_info": {
"node_name": "Default/CudnnUniformReal-op391",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
66,
0,
0,
128,
69
],
"size_in_bytes": 8,
"debugger_dtype": 11,
"shape": [
2
]
}
}
},
{
"tensor_2": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
"slot": 1,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
0,
0,
0,
0,
66,
0,
0,
128,
69,
0,
0,
144,
64,
195,
245,
216,
64,
0,
0,
48,
193
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
2,
3
]
}
}
},
{
"tensor_3": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
"slot": 0,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
8,
255,
166,
56,
189,
58,
71,
56,
103,
3,
217,
55,
170,
225,
174,
56,
135,
195,
82,
56,
54,
253,
225,
55,
254,
158,
179,
56,
33,
66,
88,
56,
30,
248,
222,
55,
241,
32,
168,
56,
143,
126,
73,
56,
116,
129,
228,
55,
53,
254,
175,
56,
2,
0,
87,
56,
246,
124,
238,
55,
177,
160,
180,
56,
156,
126,
92,
56,
144,
121,
236,
55,
117,
189,
159,
56,
25,
132,
32,
56,
154,
1,
178,
54,
187,
189,
156,
56,
117,
252,
27,
56,
205,
2,
76,
54,
212,
127,
148,
56,
129,
1,
12,
56,
53,
253,
11,
182
],
"size_in_bytes": 108,
"debugger_dtype": 11,
"shape": [
3,
3,
3
]
}
}
},
{
"tensor_4": {
"tensor_info": {
"node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
"slot": 1,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
104,
60,
33,
79,
53,
6,
131,
78,
78,
232,
126,
79,
154,
198,
85,
79,
245,
52,
84,
78,
70,
207,
222,
78
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
6
]
}
}
}
]

View File

@ -1,28 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
slot = 0
iteration = 2
device_id = None
root_graph_id = 1
is_parameter = False
tensor_data_1 attributes:
data (printed in uint8) = [149 167 122 ... 160 212 164]
size in bytes = 2076672
debugger dtype = 10
shape = [32, 12, 13, 13, 16]
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/ReLUV2-op348
slot = 1
iteration = 2
device_id = None
root_graph_id = 1
is_parameter = False
tensor_data_2 attributes:
data (printed in uint8) = [ 20 21 18 ... 126 98 25]
size in bytes = 129792
debugger dtype = 6
shape = [32, 12, 13, 13, 2]

View File

@ -1,14 +0,0 @@
-----------------------------------------------------------
watchpoint_hit for test_1 attributes:
name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
slot = 0
condition = 6
watchpoint_id = 1
parameter 0 name = param
parameter 0 disabled = False
parameter 0 value = 0.0
parameter 0 hit = True
parameter 0 actual_value = -0.1417236328125
error code = 0
device_id = 0
root_graph_id = 1

View File

@ -0,0 +1,55 @@
[
{
"watchpoint_hit1": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
"slot": 1,
"condition": 6,
"watchpoint_id": 1,
"parameter": [
{
"parameter0": {
"name": "param",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": -0.020966000854969025
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
},
{
"watchpoint_hit2": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
"slot": 0,
"condition": 18,
"watchpoint_id": 3,
"parameter": [
{
"parameter0": {
"name": "abs_mean_update_ratio_gt",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": 1.0156775705209766
}
},
{
"parameter1": {
"name": "epsilon",
"disabled": true,
"value": 0.0,
"hit": false,
"actual_value": 0.0
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
}
]

View File

@ -0,0 +1,253 @@
[
{
"tensor_1": {
"tensor_info": {
"node_name": "Default/CudnnUniformReal-op391",
"slot": 0,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
66,
0,
0,
128,
69
],
"size_in_bytes": 8,
"debugger_dtype": 11,
"shape": [
2
]
}
}
},
{
"tensor_2": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
"slot": 1,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": false
},
"tensor_data": {
"data": [
0,
0,
0,
0,
0,
0,
0,
66,
0,
0,
128,
69,
0,
0,
144,
64,
195,
245,
216,
64,
0,
0,
48,
193
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
2,
3
]
}
}
},
{
"tensor_3": {
"tensor_info": {
"node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
"slot": 0,
"iteration": 1,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
8,
255,
166,
56,
189,
58,
71,
56,
103,
3,
217,
55,
170,
225,
174,
56,
135,
195,
82,
56,
54,
253,
225,
55,
254,
158,
179,
56,
33,
66,
88,
56,
30,
248,
222,
55,
241,
32,
168,
56,
143,
126,
73,
56,
116,
129,
228,
55,
53,
254,
175,
56,
2,
0,
87,
56,
246,
124,
238,
55,
177,
160,
180,
56,
156,
126,
92,
56,
144,
121,
236,
55,
117,
189,
159,
56,
25,
132,
32,
56,
154,
1,
178,
54,
187,
189,
156,
56,
117,
252,
27,
56,
205,
2,
76,
54,
212,
127,
148,
56,
129,
1,
12,
56,
53,
253,
11,
182
],
"size_in_bytes": 108,
"debugger_dtype": 11,
"shape": [
3,
3,
3
]
}
}
},
{
"tensor_4": {
"tensor_info": {
"node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
"slot": 1,
"iteration": 0,
"rank_id": 0,
"root_graph_id": 0,
"is_output": true
},
"tensor_data": {
"data": [
104,
60,
33,
79,
53,
6,
131,
78,
78,
232,
126,
79,
154,
198,
85,
79,
245,
52,
84,
78,
70,
207,
222,
78
],
"size_in_bytes": 24,
"debugger_dtype": 11,
"shape": [
6
]
}
}
}
]

View File

@ -1,73 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
slot = 0
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = True
tensor_data_1 attributes:
data (printed in uint8) = [170 19 44 181 254 212 16 52 52 162 148 180 130 115 226 180 183 243
101 52 224 79 189 51 10 70 69 51 199 75 159 52 79 98 104 52
106 77 19 52 129 183 8 180 252 58 48 180 35 219 9 52 240 201
179 51 142 151 158 51 210 145 182 53 140 219 0 53 140 219 22 181
46 33 87 180 238 90 122 180 166 10 38 179 202 195 4 53 166 10
150 51 214 120 209 52 235 115 37 180 92 177 215 180 0 136 84 51
72 114 145 180 43 169 255 180 114 27 61 52 76 225 122 50 126 72
159 51 58 35 202 51 114 61 106 51 60 223 63 52 209 179 1 52
232 217 44 178 130 158 109 179 213 231 10 179 37 40 94 179 208 68
64 53 6 52 249 52 162 35 1 181 231 29 155 52 30 201 69 180
229 131 126 51 18 165 109 180 164 112 163 181 116 172 11 178 6 129
37 52 54 205 203 180 115 104 145 52 232 106 219 179 36 40 214 52
202 50 204 52 76 89 38 179 230 140 232 178 168 53 77 52 180 191
108 51 128 183 64 51 56 137 161 180 247 6 143 180 126 63 197 180
198 177 94 52 140 185 139 51 150 178 228 180 255 67 150 52 134 201
164 52 107 43 14 53 174 216 63 179 40 160 41 53 120 88 72 179
218 172 234 52 234 38 25 52 85 159 155 180 254 67 138 180 34 253
118 180 218 61 17 52 242 133 253 52 175 37 180 52 171 62 163 52
202 195 86 53 160 171 45 52 34 31 176 180 156 85 5 53 178 191
68 180 42 203 140 52 248 117 72 52 248 253 212 176 195 100 202 51
87 14 141 52 91 100 235 51 48 221 136 52 143 117 17 180 51 196
25 52 127 29 112 180 152 144 207 178 219 104 64 52 21 174 251 52
164 78 138 181 20 63 6 52 10 249 96 179 163 146 18 53 200 186
236 52 2 188 85 52 124 140 121 179 246 185 22 181 246 74 249 51
70 182 135 53 189 227 76 52 249 160 159 180 134 235 65 53 64 164
255 51 224 156 41 53 142 117 69 181 247 151 101 53 185 175 35 52
164 112 21 53 30 31 212 179 142 151 110 179 176 148 29 181 206 204
88 53 116 215 214 180 172 173 216 51 106 222 153 180 200 152 19 181
176 3 7 52 215 52 87 52]
size in bytes = 512
debugger dtype = 11
shape = [128]
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
slot = 0
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = False
tensor_data_2 attributes:
data (printed in uint8) = [181 167 46 26 122 155 141 164 212 39 111 27 247 156 1 152 189 36
15 161 254 167 82 163 33 42 101 158 225 161 24 167 103 140 45 42
178 170 173 29 48 42 39 32 56 25 216 170 128 41 216 23 153 154
39 173 193 42 84 160 111 22 61 144]
size in bytes = 64
debugger dtype = 10
shape = [2, 2, 2, 2, 2]
-----------------------------------------------------------
tensor_info_3 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346
slot = 1
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = False
tensor_data_3 attributes:
data (printed in uint8) = [ 50 17 122 ... 94 42 90]
size in bytes = 129792
debugger dtype = 6
shape = [32, 12, 13, 13, 2]

View File

@ -1,33 +0,0 @@
-----------------------------------------------------------
watchpoint_hit for test_1 attributes:
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
slot = 0
condition = 6
watchpoint_id = 1
parameter 0 name = param
parameter 0 disabled = False
parameter 0 value = 0.0
parameter 0 hit = True
parameter 0 actual_value = -0.08050537109375
error code = 0
device_id = 0
root_graph_id = 0
-----------------------------------------------------------
watchpoint_hit for test_4 attributes:
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
slot = 0
condition = 18
watchpoint_id = 3
parameter 0 name = abs_mean_update_ratio_gt
parameter 0 disabled = False
parameter 0 value = 0.0
parameter 0 hit = True
parameter 0 actual_value = 0.5243796973599475
parameter 1 name = epsilon
parameter 1 disabled = True
parameter 1 value = 0.0
parameter 1 hit = False
parameter 1 actual_value = 0.0
error code = 0
device_id = 0
root_graph_id = 0

View File

@ -1,98 +0,0 @@
-----------------------------------------------------------
tensor_info_1 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
slot = 0
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = True
tensor_data_1 attributes:
data (printed in uint8) = [230 208 10 52 104 34 252 52 4 231 144 52 188 150 64 180 88 236
15 180 254 135 180 51 131 226 147 52 88 202 62 53 2 43 55 53
231 29 87 180 220 249 30 180 157 17 177 180 81 107 140 181 8 95
192 180 89 134 112 180 96 238 90 178 156 196 212 180 206 25 15 181
212 154 6 180 91 211 116 52 191 14 140 51 128 106 124 53 28 158
70 181 182 21 251 50 100 204 157 179 88 202 42 180 7 95 8 53
128 251 238 52 241 133 241 52 111 86 157 179 48 221 148 180 200 7
141 180 236 226 182 51 190 82 158 180 140 108 179 180 195 134 215 179
103 213 39 179 89 168 149 180 42 58 58 180 64 53 62 179 250 126
158 52 38 83 117 52 0 0 136 180 136 133 122 51 110 18 131 179
238 13 94 51 102 136 15 181 134 90 227 180 16 11 117 180 35 74
163 52 105 0 87 181 112 18 131 50 226 233 67 181 217 172 10 52
206 25 217 52 208 213 22 52 146 203 87 180 74 46 207 52 178 191
4 180 100 93 216 52 119 190 171 180 223 2 5 181 128 72 207 179
58 146 11 179 224 79 137 52 143 228 154 180 246 219 215 179 14 79
195 52 126 29 64 52 132 192 42 51 94 220 86 52 94 109 1 181
72 37 117 178 110 197 94 180 160 94 153 179 118 224 80 181 156 17
37 50 120 156 162 53 26 115 135 180 228 20 29 53 145 126 147 52
99 16 48 180 211 188 199 180 52 51 99 180 93 254 227 52 152 126
123 49 6 18 16 181 5 163 130 51 27 158 98 53 134 235 189 52
119 45 9 180 130 115 110 52 158 128 162 52 232 251 197 180 178 46
158 179 57 214 157 52 172 207 161 180 208 0 222 49 242 99 32 53
20 174 135 50 247 117 176 52 194 57 43 180 140 108 135 51 243 65
175 51 187 73 156 51 63 232 217 50 180 234 115 52 194 168 148 52
27 192 183 180 45 178 157 52 125 208 17 53 236 192 65 53 190 193
7 53 254 246 57 53 3 43 199 51 64 164 215 180 220 104 240 51
23 72 24 180 68 173 9 51 72 114 29 53 105 0 57 181 188 150
8 53 229 97 131 53 0 34 189 51 163 146 74 53 31 244 204 51
86 193 220 180 156 51 146 179]
size in bytes = 512
debugger dtype = 11
shape = [128]
-----------------------------------------------------------
tensor_info_2 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171
slot = 0
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = False
tensor_data_2 attributes:
data (printed in uint8) = [ 99 26 69 41 190 38 128 38 232 38 16 39 5 39 24 39 1 39
218 38 219 38 43 39 241 33 21 165 159 32 15 145 191 28 66 30
110 30 149 31 14 29 179 29 249 28 94 29 141 156 210 36 143 166
201 162 5 165 54 166 100 165 57 165 81 165 25 166 150 165 236 164
20 164 238 165 170 20 200 168 16 168 36 169 9 169 195 168 64 168
248 168 10 169 20 168 56 167 137 167 124 168 221 152 35 168 163 167
110 169 147 168 198 167 52 168 91 168 14 168 30 168 240 167 171 168
235 168 37 161 222 165 16 161 88 164 68 162 156 152 109 151 181 156
0 152 84 158 112 154 193 161 13 162 172 28 38 163 16 31 255 26
102 21 64 31 177 28 102 156 77 20 62 25 177 26 26 22 241 24
188 33 149 160 67 36 171 35 38 36 68 34 148 19 54 162 53 161
174 156 195 134 139 24 210 35 175 36 206 158 136 37 88 36 31 36
78 20 203 159 6 165 235 163 83 162 7 157 76 31 240 35 38 37
20 160 193 38 130 29 95 23 177 161 143 162 46 165 103 164 106 163
167 162 36 158 130 161 149 33 171 157 138 37 252 27 198 164 116 166
60 165 36 165 47 165 150 166 188 166 112 167 58 166 33 140 141 163
93 32 38 159 13 168 194 166 78 166 8 166 201 165 115 166 128 166
77 166 29 166 131 157 150 31 46 32 124 164 239 166 219 165 96 166
216 166 21 167 28 167 35 167 237 165 202 164 57 32 75 26 208 40
148 40 205 40 162 40 187 40 181 40 181 40 155 40 124 40 129 40
157 40 186 29 253 32 138 44 226 43 43 43 237 42 164 42 137 42
174 42 179 42 160 42 104 42 30 42 53 38 140 25 240 44 120 44
236 42 19 43 143 42 6 42 181 41 83 42 0 43 112 42 97 41
27 32 177 32 254 44 105 43 242 40 239 40 71 41 223 40 237 40
93 41 22 41 211 40 227 40 187 20 71 30 4 44 188 40 79 36
133 38 62 39 209 38 15 38 83 38 136 38 146 38 100 37 118 152
185 149 165 42 99 41 61 36 241 37 34 38 170 38 62 38 69 39
215 39 128 39 49 38 54 33 141 161 184 41 34 40 100 36 230 37
133 38 57 37 224 35 7 37]
size in bytes = 512
debugger dtype = 10
shape = [4, 4, 4, 4]
-----------------------------------------------------------
tensor_info_3 attributes:
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353
slot = 1
iteration = 2
device_id = None
root_graph_id = 0
is_parameter = False
tensor_data_3 attributes:
data (printed in uint8) = [19 17 27 ... 94 42 90]
size in bytes = 129792
debugger dtype = 6
shape = [32, 12, 13, 13, 2]

View File

@ -0,0 +1,55 @@
[
{
"watchpoint_hit1": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
"slot": 1,
"condition": 6,
"watchpoint_id": 1,
"parameter": [
{
"parameter0": {
"name": "param",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": -0.020966000854969025
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
},
{
"watchpoint_hit2": {
"name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
"slot": 0,
"condition": 18,
"watchpoint_id": 3,
"parameter": [
{
"parameter0": {
"name": "abs_mean_update_ratio_gt",
"disabled": false,
"value": 0.0,
"hit": true,
"actual_value": 1.0156775705209766
}
},
{
"parameter1": {
"name": "epsilon",
"disabled": true,
"value": 0.0,
"hit": false,
"actual_value": 0.0
}
}
],
"error_code": 0,
"rank_id": 0,
"root_graph_id": 0
}
}
]

View File

@ -1,86 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "async_sink_mode_true_read_tensors"
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_async_sink_mode_true_read_tensors():
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/async_sink_true/")
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
# output tensor with zero slot
info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
"conv3-Conv2d/Conv2D-op169",
slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
# output tensor with non-zero slot
info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
"ReLUV2-op348",
slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
tensor_info = [info1, info2]
tensor_data = debugger_backend.read_tensors(tensor_info)
print_read_tensors(tensor_info, tensor_data)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_read_tensors(tensor_info, tensor_data):
"""Print read tensors."""
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
for x, _ in enumerate(tensor_info):
f_write.write("-----------------------------------------------------------\n")
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
f_write.write("node name = " + tensor_info[x].node_name + "\n")
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
f_write.write("\n")
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
py_byte_size = len(tensor_data[x].data_ptr)
c_byte_size = tensor_data[x].data_size
if c_byte_size != py_byte_size:
f_write.write("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
f_write.close()

View File

@ -1,109 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Watchpoints test script for offline debugger APIs.
"""
import mindspore.offline_debug.dbg_services as d
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "async_sink_mode_true_watchpoints"
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_async_sink_mode_true_watchpoints():
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/async_sink_true/")
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
# NOTES:
# -> watch_condition=6 is MIN_LT
# -> watch_condition=18 is CHANGE_TOO_LARGE
# test 1: watchpoint set and hit (watch_condition=6)
param1 = d.Parameter(name="param", disabled=False, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
{"device_id": [0], "root_graph_id": [1],
"is_parameter": False
}}, parameter_list=[param1])
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
if len(watchpoint_hits_test_1) != 1:
f_write.write("ERROR -> test 1: watchpoint set but not hit just once\n")
print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
# test 2: watchpoint remove and ensure it's not hit
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_2:
f_write.write("ERROR -> test 2: watchpoint removed but hit\n")
# test 3: watchpoint set and not hit, then remove
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
{"device_id": [0], "root_graph_id": [1],
"is_parameter": False
}}, parameter_list=[param2])
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_3:
f_write.write("ERROR -> test 3: watchpoint set but not supposed to be hit\n")
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
f_write.close()
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
"""Print watchpoint hits."""
for x, _ in enumerate(watchpoint_hits):
f_write.write("-----------------------------------------------------------\n")
f_write.write("watchpoint_hit for test_%u attributes:" % test_id + "\n")
f_write.write("name = " + watchpoint_hits[x].name + "\n")
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
f_write.write("condition = " + str(watchpoint_hits[x].condition) + "\n")
f_write.write("watchpoint_id = " + str(watchpoint_hits[x].watchpoint_id) + "\n")
for p, _ in enumerate(watchpoint_hits[x].parameters):
f_write.write("parameter " + str(p) + " name = " +
watchpoint_hits[x].parameters[p].name + "\n")
f_write.write("parameter " + str(p) + " disabled = " +
str(watchpoint_hits[x].parameters[p].disabled) + "\n")
f_write.write("parameter " + str(p) + " value = " +
str(watchpoint_hits[x].parameters[p].value) + "\n")
f_write.write("parameter " + str(p) + " hit = " +
str(watchpoint_hits[x].parameters[p].hit) + "\n")
f_write.write("parameter " + str(p) + " actual_value = " +
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
f_write.write("error code = " + str(watchpoint_hits[x].error_code) + "\n")
f_write.write("device_id = " + str(watchpoint_hits[x].device_id) + "\n")
f_write.write("root_graph_id = " + str(watchpoint_hits[x].root_graph_id) + "\n")

View File

@ -0,0 +1,159 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import os
import json
import tempfile
import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from tests.security_utils import security_off_wrap
from dump_test_utils import build_dump_structure
GENERATE_GOLDEN = False
tensor_json = []
def run_read_tensors(is_sync):
if is_sync:
test_name = "sync_read_tensors"
else:
test_name = "async_read_tensors"
# input tensor with zero slot
tensor1 = np.array([32.0, 4096.0], np.float32)
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
# input tensor with non-zero slot
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
"gradReLU/ReluGradV2-op406",
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
# output tensor with zero slot
tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
[8.339e-05, 5.025e-05, 2.694e-05],
[8.565e-05, 5.156e-05, 2.658e-05]],
[[8.017e-05, 4.804e-05, 2.724e-05],
[8.392e-05, 5.126e-05, 2.843e-05],
[8.613e-05, 5.257e-05, 2.819e-05]],
[[7.617e-05, 3.827e-05, 5.305e-06],
[7.474e-05, 3.719e-05, 3.040e-06],
[7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
"gradConv2D/Conv2DBackpropFilter-op424",
slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
# output tensor with non-zero slot
tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
name4 = "ReLUV2.ReLUV2-op381.0.0."
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
tensor_name = [name1, name2, name3, name4]
tensor_list = [tensor1, tensor2, tensor3, tensor4]
tensor_info = [info1, info2, info3, info4]
pwd = os.getcwd()
with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info)
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
debugger_backend.initialize(net_name="Test", is_sync_mode=is_sync)
tensor_data = debugger_backend.read_tensors(tensor_info)
if GENERATE_GOLDEN:
print_read_tensors(tensor_info, tensor_data, 0, True, test_name)
else:
compare_expect_actual_result(tensor_info, tensor_data, 0, test_name)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_sync_read_tensors():
run_read_tensors(True)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_read_tensors():
run_read_tensors(False)
def compare_expect_actual_result(tensor_info_list, tensor_data_list, test_index, test_name):
"""Compare actual result with golden file."""
pwd = os.getcwd()
golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json"))
with open(golden_file) as f:
expected_list = json.load(f)
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
test_id = "tensor_"+ str(test_index+x+1)
info = expected_list[x+test_index][test_id]
assert tensor_info.node_name == info['tensor_info']['node_name']
assert tensor_info.slot == info['tensor_info']['slot']
assert tensor_info.iteration == info['tensor_info']['iteration']
assert tensor_info.rank_id == info['tensor_info']['rank_id']
assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
assert tensor_info.is_output == info['tensor_info']['is_output']
actual_data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
assert actual_data == info['tensor_data']['data']
assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
assert tensor_data.shape == info['tensor_data']['shape']
def print_read_tensors(tensor_info_list, tensor_data_list, test_index, is_print, test_name):
"""Print read tensors result if GENERATE_GOLDEN is True."""
for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
tensor = "tensor_" + str(test_index+x+1)
data = np.frombuffer(
tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
py_byte_size = len(tensor_data.data_ptr)
c_byte_size = tensor_data.data_size
if c_byte_size != py_byte_size:
print("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
tensor_json.append({
tensor: {
'tensor_info': {
'node_name': tensor_info.node_name,
'slot': tensor_info.slot,
'iteration': tensor_info.iteration,
'rank_id': tensor_info.rank_id,
'root_graph_id': tensor_info.root_graph_id,
'is_output': tensor_info.is_output
},
'tensor_data': {
'data': data,
'size_in_bytes': tensor_data.data_size,
'debugger_dtype': tensor_data.dtype,
'shape': tensor_data.shape
}
}
})
if is_print:
with open(test_name + "_expected.json", "w") as dump_f:
json.dump(tensor_json, dump_f, indent=4, separators=(',', ': '))

View File

@ -1,89 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_false_read_tensors"
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_false_read_tensors():
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_false/alexnet/")
_ = debugger_backend.initialize(
net_name="Network Name goes here!", is_sync_mode=True)
# parameter
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
# output tensor with zero slot
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168",
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
# output tensor with non-zero slot
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346",
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
tensor_info = [info1, info2, info3]
tensor_data = debugger_backend.read_tensors(tensor_info)
print_read_tensors(tensor_info, tensor_data)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_read_tensors(tensor_info, tensor_data):
"""Print read tensors."""
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
for x, _ in enumerate(tensor_info):
f_write.write("-----------------------------------------------------------\n")
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
f_write.write("node name = " + tensor_info[x].node_name + "\n")
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
f_write.write("\n")
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
py_byte_size = len(tensor_data[x].data_ptr)
c_byte_size = tensor_data[x].data_size
if c_byte_size != py_byte_size:
f_write.write("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
f_write.close()

View File

@ -1,128 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Watchpoints test script for offline debugger APIs.
"""
import mindspore.offline_debug.dbg_services as d
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_false_watchpoints"
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_false_watchpoints():
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_false/alexnet/")
_ = debugger_backend.initialize(
net_name="Network Name goes here!", is_sync_mode=True)
# NOTES:
# -> watch_condition=6 is MIN_LT
# -> watch_condition=18 is CHANGE_TOO_LARGE
# test 1: watchpoint set and hit (watch_condition=6)
param1 = d.Parameter(name="param", disabled=False, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
"Conv2D-op168":
{"device_id": [0], "root_graph_id": [0],
"is_parameter": False
}}, parameter_list=[param1])
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
if len(watchpoint_hits_test_1) != 1:
f_write.write("ERROR -> test 1: watchpoint set but not hit just once")
print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
# test 2: watchpoint remove and ensure it's not hit
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_2:
f_write.write("ERROR -> test 2: watchpoint removed but hit")
# test 3: watchpoint set and not hit, then remove
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
"Conv2D-op308":
{"device_id": [0], "root_graph_id": [0],
"is_parameter": False
}}, parameter_list=[param2])
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
if watchpoint_hits_test_3:
f_write.write("ERROR -> test 3: watchpoint set but not supposed to be hit")
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
# test 4: weight change watchpoint set and hit
param_abs_mean_update_ratio_gt = d.Parameter(
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc3.bias":
{"device_id": [0], "root_graph_id": [0],
"is_parameter": True
}}, parameter_list=[param_abs_mean_update_ratio_gt,
param_epsilon])
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
if len(watchpoint_hits_test_4) != 1:
f_write.write("ERROR -> test 4: watchpoint weight change set but not hit just once")
print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
f_write.close()
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
"""Print watchpoint hits."""
for x, _ in enumerate(watchpoint_hits):
f_write.write("-----------------------------------------------------------\n")
f_write.write("watchpoint_hit for test_%u attributes:" % test_id + "\n")
f_write.write("name = " + watchpoint_hits[x].name + "\n")
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
f_write.write("condition = " + str(watchpoint_hits[x].condition) + "\n")
f_write.write("watchpoint_id = " + str(watchpoint_hits[x].watchpoint_id) + "\n")
for p, _ in enumerate(watchpoint_hits[x].parameters):
f_write.write("parameter " + str(p) + " name = " +
watchpoint_hits[x].parameters[p].name + "\n")
f_write.write("parameter " + str(p) + " disabled = " +
str(watchpoint_hits[x].parameters[p].disabled) + "\n")
f_write.write("parameter " + str(p) + " value = " +
str(watchpoint_hits[x].parameters[p].value) + "\n")
f_write.write("parameter " + str(p) + " hit = " +
str(watchpoint_hits[x].parameters[p].hit) + "\n")
f_write.write("parameter " + str(p) + " actual_value = " +
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
f_write.write("error code = " + str(watchpoint_hits[x].error_code) + "\n")
f_write.write("device_id = " + str(watchpoint_hits[x].device_id) + "\n")
f_write.write("root_graph_id = " + str(watchpoint_hits[x].root_graph_id) + "\n")

View File

@ -1,89 +0,0 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Read tensor test script for offline debugger APIs.
"""
import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_true_read_tensors"
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_true_read_tensors():
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/sync_trans_true/alexnet/")
_ = debugger_backend.initialize(
net_name="Network Name goes here!", is_sync_mode=True)
# parameter
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
# output tensor with zero slot
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171",
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
# output tensor with non-zero slot
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353",
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
tensor_info = [info1, info2, info3]
tensor_data = debugger_backend.read_tensors(tensor_info)
print_read_tensors(tensor_info, tensor_data)
if not GENERATE_GOLDEN:
assert compare_actual_with_expected(test_name)
def print_read_tensors(tensor_info, tensor_data):
"""Print read tensors."""
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")
else:
f_write = open(test_name + ".actual", "w")
for x, _ in enumerate(tensor_info):
f_write.write("-----------------------------------------------------------\n")
f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
f_write.write("node name = " + tensor_info[x].node_name + "\n")
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
f_write.write("root_graph_id = " + str(tensor_info[x].root_graph_id) + "\n")
f_write.write("is_parameter = " + str(tensor_info[x].is_parameter) + "\n")
f_write.write("\n")
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
py_byte_size = len(tensor_data[x].data_ptr)
c_byte_size = tensor_data[x].data_size
if c_byte_size != py_byte_size:
f_write.write("The python byte size of " + str(py_byte_size) +
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
f_write.write("size in bytes = " + str(tensor_data[x].data_size) + "\n")
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
f_write.close()

View File

@ -0,0 +1,204 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Watchpoints test script for offline debugger APIs.
"""
import os
import json
import tempfile
import numpy as np
import mindspore.offline_debug.dbg_services as d
import pytest
from tests.security_utils import security_off_wrap
from dump_test_utils import build_dump_structure
GENERATE_GOLDEN = False
watchpoint_hits_json = []
def run_watchpoints(is_sync):
if is_sync:
test_name = "sync_watchpoints"
else:
test_name = "async_watchpoints"
name1 = "Conv2D.Conv2D-op369.0.0.1"
tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
[-1.3931e-02, 8.9359e-04, -1.1520e-02],
[-6.3248e-03, 1.8749e-03, 1.0132e-02]],
[[-2.5520e-03, -6.0005e-03, -5.1918e-03],
[-2.7866e-03, 2.5487e-04, 8.4782e-04],
[-4.6310e-03, -8.9111e-03, -8.1778e-05]],
[[1.3914e-03, 6.0844e-04, 1.0643e-03],
[-2.0966e-02, -1.2865e-03, -1.8692e-03],
[-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
name2 = "Parameter.fc2.bias.0.0.2"
tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
2.1177532e-07, 2.9952851e-06], np.float32)
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias",
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
-5.1546101e-07, 6.0798648e-06], np.float32)
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias",
slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
tensor_info = [info1, info2, info3]
tensor_name = [name1, name2, name2]
tensor_list = [tensor1, tensor2, tensor3]
pwd = os.getcwd()
with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info)
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
debugger_backend.initialize(net_name="Test", is_sync_mode=False)
# NOTES:
# -> watch_condition=6 is MIN_LT
# -> watch_condition=18 is CHANGE_TOO_LARGE
# test 1: watchpoint set and hit (watch_condition=6)
param1 = d.Parameter(name="param", disabled=False, value=0.0)
debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
"conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param1])
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
assert len(watchpoint_hits_test_1) == 1
if GENERATE_GOLDEN:
print_watchpoint_hits(watchpoint_hits_test_1, 0, False, test_name)
else:
compare_expect_actual_result(watchpoint_hits_test_1, 0, test_name)
# test 2: watchpoint remove and ensure it's not hit
debugger_backend.remove_watchpoint(watchpoint_id=1)
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
assert not watchpoint_hits_test_2
# test 3: watchpoint set and not hit, then remove
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
"conv1-Conv2d/Conv2D-op369":
{"rank_id": [0], "root_graph_id": [0], "is_output": False
}}, parameter_list=[param2])
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
assert not watchpoint_hits_test_3
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
# test 4: weight change watchpoint set and hit
param_abs_mean_update_ratio_gt = d.Parameter(
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
"Parameter[6]_11/fc2.bias":
{"rank_id": [0], "root_graph_id": [0], "is_output": True
}}, parameter_list=[param_abs_mean_update_ratio_gt,
param_epsilon])
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
assert len(watchpoint_hits_test_4) == 1
if GENERATE_GOLDEN:
print_watchpoint_hits(watchpoint_hits_test_4, 1, True, test_name)
else:
compare_expect_actual_result(watchpoint_hits_test_4, 1, test_name)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_sync_watchpoints():
run_watchpoints(True)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_watchpoints():
run_watchpoints(False)
def compare_expect_actual_result(watchpoint_hits_list, test_index, test_name):
"""Compare actual result with golden file."""
pwd = os.getcwd()
golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json"))
with open(golden_file) as f:
expected_list = json.load(f)
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
test_id = "watchpoint_hit" + str(test_index+x+1)
info = expected_list[x+test_index][test_id]
assert watchpoint_hits.name == info['name']
assert watchpoint_hits.slot == info['slot']
assert watchpoint_hits.condition == info['condition']
assert watchpoint_hits.watchpoint_id == info['watchpoint_id']
assert watchpoint_hits.error_code == info['error_code']
assert watchpoint_hits.rank_id == info['rank_id']
assert watchpoint_hits.root_graph_id == info['root_graph_id']
for p, _ in enumerate(watchpoint_hits.parameters):
parameter = "parameter" + str(p)
assert watchpoint_hits.parameters[p].name == info['parameter'][p][parameter]['name']
assert watchpoint_hits.parameters[p].disabled == info['parameter'][p][parameter]['disabled']
assert watchpoint_hits.parameters[p].value == info['parameter'][p][parameter]['value']
assert watchpoint_hits.parameters[p].hit == info['parameter'][p][parameter]['hit']
assert watchpoint_hits.parameters[p].actual_value == info['parameter'][p][parameter]['actual_value']
def print_watchpoint_hits(watchpoint_hits_list, test_index, is_print, test_name):
"""Print watchpoint hits."""
for x, watchpoint_hits in enumerate(watchpoint_hits_list):
parameter_json = []
for p, _ in enumerate(watchpoint_hits.parameters):
parameter = "parameter" + str(p)
parameter_json.append({
parameter: {
'name': watchpoint_hits.parameters[p].name,
'disabled': watchpoint_hits.parameters[p].disabled,
'value': watchpoint_hits.parameters[p].value,
'hit': watchpoint_hits.parameters[p].hit,
'actual_value': watchpoint_hits.parameters[p].actual_value
}
})
watchpoint_hit = "watchpoint_hit" + str(test_index+x+1)
watchpoint_hits_json.append({
watchpoint_hit: {
'name': watchpoint_hits.name,
'slot': watchpoint_hits.slot,
'condition': watchpoint_hits.condition,
'watchpoint_id': watchpoint_hits.watchpoint_id,
'parameter': parameter_json,
'error_code': watchpoint_hits.error_code,
'rank_id': watchpoint_hits.rank_id,
'root_graph_id': watchpoint_hits.root_graph_id
}
})
if is_print:
with open(test_name + "_expected.json", "w") as dump_f:
json.dump(watchpoint_hits_json, dump_f, indent=4, separators=(',', ': '))