re-enabling offline debugger ut test
This commit is contained in:
parent
76a37daa43
commit
a9c6bf45a3
|
@ -1063,7 +1063,7 @@ class WatchpointHit():
|
|||
>>> name = watchpoint_hit.name
|
||||
"""
|
||||
|
||||
return self.instance.name()
|
||||
return self.instance.get_name()
|
||||
|
||||
@property
|
||||
def slot(self):
|
||||
|
@ -1086,7 +1086,7 @@ class WatchpointHit():
|
|||
>>> slot = watchpoint_hit.slot
|
||||
"""
|
||||
|
||||
return self.instance.slot()
|
||||
return self.instance.get_slot()
|
||||
|
||||
@property
|
||||
def condition(self):
|
||||
|
@ -1109,7 +1109,7 @@ class WatchpointHit():
|
|||
>>> condition = watchpoint_hit.condition
|
||||
"""
|
||||
|
||||
return self.instance.condition()
|
||||
return self.instance.get_condition()
|
||||
|
||||
@property
|
||||
def watchpoint_id(self):
|
||||
|
@ -1132,7 +1132,7 @@ class WatchpointHit():
|
|||
>>> watchpoint_id = watchpoint_hit.watchpoint_id
|
||||
"""
|
||||
|
||||
return self.instance.watchpoint_id()
|
||||
return self.instance.get_watchpoint_id()
|
||||
|
||||
@property
|
||||
def parameters(self):
|
||||
|
@ -1155,7 +1155,7 @@ class WatchpointHit():
|
|||
>>> parameters = watchpoint_hit.parameters
|
||||
"""
|
||||
|
||||
params = self.instance.parameters()
|
||||
params = self.instance.get_parameters()
|
||||
param_list = []
|
||||
for elem in params:
|
||||
tmp = Parameter(elem.get_name(),
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/CudnnUniformReal-op391
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 66 0 0 128 69]
|
||||
size in bytes = 8
|
||||
debugger dtype = 11
|
||||
shape = [2]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
|
||||
slot = 1
|
||||
iteration = 1
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 0 0 0 0 66 0 0 128 69 0 0 144 64 195 245
|
||||
216 64 0 0 48 193]
|
||||
size in bytes = 24
|
||||
debugger dtype = 11
|
||||
shape = [2, 3]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424
|
||||
slot = 0
|
||||
iteration = 1
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 8 255 166 56 189 58 71 56 103 3 217 55 170 225 174 56 135 195
|
||||
82 56 54 253 225 55 254 158 179 56 33 66 88 56 30 248 222 55
|
||||
241 32 168 56 143 126 73 56 116 129 228 55 53 254 175 56 2 0
|
||||
87 56 246 124 238 55 177 160 180 56 156 126 92 56 144 121 236 55
|
||||
117 189 159 56 25 132 32 56 154 1 178 54 187 189 156 56 117 252
|
||||
27 56 205 2 76 54 212 127 148 56 129 1 12 56 53 253 11 182]
|
||||
size in bytes = 108
|
||||
debugger dtype = 11
|
||||
shape = [3, 3, 3]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_4 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381
|
||||
slot = 1
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_data_4 attributes:
|
||||
data (printed in uint8) = [104 60 33 79 53 6 131 78 78 232 126 79 154 198 85 79 245 52
|
||||
84 78 70 207 222 78]
|
||||
size in bytes = 24
|
||||
debugger dtype = 11
|
||||
shape = [6]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_5 attributes:
|
||||
node name = Default/Reciprocal-op3
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = True
|
||||
|
||||
tensor_data_5 attributes:
|
||||
data (printed in uint8) = [ 0 0 128 63 0 0 128 255 0 0 128 127 0 0 128 255 0 0
|
||||
128 127 0 0 128 127 0 0 128 63 0 0 128 255 0 0 128 127
|
||||
0 0 128 127]
|
||||
size in bytes = 40
|
||||
debugger dtype = 11
|
||||
shape = [2, 5]
|
|
@ -0,0 +1,28 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/CudnnUniformReal-op390
|
||||
slot = 0
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = []
|
||||
size in bytes = 0
|
||||
debugger dtype = 0
|
||||
shape = []
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
|
||||
slot = 1
|
||||
iteration = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
is_output = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = []
|
||||
size in bytes = 0
|
||||
debugger dtype = 0
|
||||
shape = []
|
|
@ -1,70 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248
|
||||
194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0
|
||||
0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0
|
||||
176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241
|
||||
195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0
|
||||
0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0
|
||||
0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127
|
||||
0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0
|
||||
184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213
|
||||
4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0
|
||||
195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0
|
||||
22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248
|
||||
194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0
|
||||
32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0
|
||||
0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127
|
||||
0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0
|
||||
32 104 15 140 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 ... 0 0 192]
|
||||
size in bytes = 1024
|
||||
debugger dtype = 11
|
||||
shape = [4, 4, 4, 4]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 0 169 0 ... 244 21 184]
|
||||
size in bytes = 1024
|
||||
debugger dtype = 8
|
||||
shape = [256]
|
|
@ -1,70 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65
|
||||
195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58
|
||||
118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0
|
||||
0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249
|
||||
117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0
|
||||
224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0
|
||||
0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127
|
||||
0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0
|
||||
120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213
|
||||
4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0
|
||||
195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127
|
||||
0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0
|
||||
10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65
|
||||
195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127
|
||||
0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0
|
||||
32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0
|
||||
0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65
|
||||
195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127
|
||||
0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0
|
||||
32 104 15 204 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 76 157 184]
|
||||
size in bytes = 1024
|
||||
debugger dtype = 11
|
||||
shape = [4, 4, 4, 4]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 76 157 184]
|
||||
size in bytes = 1024
|
||||
debugger dtype = 8
|
||||
shape = [256]
|
|
@ -1,14 +0,0 @@
|
|||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op318
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = []
|
||||
size in bytes = 0
|
||||
debugger dtype = 0
|
||||
shape = []
|
|
@ -1,20 +1,20 @@
|
|||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369
|
||||
slot = 1
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -2.0
|
||||
parameter 0 actual_value = -0.020966000854969025
|
||||
error code = 0
|
||||
device_id = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
|
@ -22,12 +22,12 @@ parameter 0 name = abs_mean_update_ratio_gt
|
|||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 1.793662034335766e-35
|
||||
parameter 0 actual_value = 1.0156775705209766
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
rank_id = 0
|
||||
root_graph_id = 0
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -18,6 +18,8 @@ Utils for testing offline debugger.
|
|||
|
||||
import filecmp
|
||||
import os
|
||||
import tempfile
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compare_actual_with_expected(test_name):
|
||||
|
@ -28,6 +30,23 @@ def compare_actual_with_expected(test_name):
|
|||
os.remove(test_name + ".actual")
|
||||
return is_eq
|
||||
|
||||
def skip_test():
|
||||
"""Skips the test."""
|
||||
return True
|
||||
def build_dump_structure(tensor_name_list, tensor_list, net_name, tensor_info_list):
|
||||
"""Build dump file structure from tensor_list."""
|
||||
temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")
|
||||
for x, _ in enumerate(tensor_info_list):
|
||||
slot = str(tensor_info_list[x].slot)
|
||||
iteration = str(tensor_info_list[x].iteration)
|
||||
rank_id = str(tensor_info_list[x].rank_id)
|
||||
root_graph_id = str(tensor_info_list[x].root_graph_id)
|
||||
is_output = str(tensor_info_list[x].is_output)
|
||||
path = os.path.join(temp_dir, "rank_" + rank_id, net_name, root_graph_id, iteration)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
if is_output == "True":
|
||||
file = tempfile.mkstemp(prefix=tensor_name_list[x], suffix=".output." + slot +
|
||||
".DefaultFormat.npy", dir=path)
|
||||
else:
|
||||
file = tempfile.mkstemp(prefix=tensor_name_list[x], suffix=".input." + slot +
|
||||
".DefaultFormat.npy", dir=path)
|
||||
full_path = file[1]
|
||||
np.save(full_path, tensor_list[x])
|
||||
return temp_dir
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import shutil
|
||||
import numpy as np
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
from dump_test_utils import compare_actual_with_expected, build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_read_tensors"
|
||||
|
||||
|
||||
def test_sync_trans_false_read_tensors():
|
||||
|
||||
# input tensor with zero slot
|
||||
tensor1 = np.array([32.0, 4096.0], np.float32)
|
||||
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
|
||||
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
|
||||
# input tensor with non-zero slot
|
||||
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
|
||||
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
|
||||
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
|
||||
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
|
||||
# output tensor with zero slot
|
||||
tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
|
||||
[8.339e-05, 5.025e-05, 2.694e-05],
|
||||
[8.565e-05, 5.156e-05, 2.658e-05]],
|
||||
[[8.017e-05, 4.804e-05, 2.724e-05],
|
||||
[8.392e-05, 5.126e-05, 2.843e-05],
|
||||
[8.613e-05, 5.257e-05, 2.819e-05]],
|
||||
[[7.617e-05, 3.827e-05, 5.305e-06],
|
||||
[7.474e-05, 3.719e-05, 3.040e-06],
|
||||
[7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
|
||||
name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
|
||||
info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
|
||||
"gradConv2D/Conv2DBackpropFilter-op424",
|
||||
slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
|
||||
# output tensor with non-zero slot
|
||||
tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
|
||||
name4 = "ReLUV2.ReLUV2-op381.0.0."
|
||||
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
|
||||
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
# inf tensor
|
||||
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
|
||||
inf_name = "Reciprocal.Reciprocal-op3.0.0."
|
||||
inf_info = d.TensorInfo(node_name="Default/Reciprocal-op3",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_name = [name1, name2, name3, name4]
|
||||
tensor_list = [tensor1, tensor2, tensor3, tensor4]
|
||||
tensor_info = [info1, info2, info3, info4]
|
||||
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
|
||||
inf_dir = build_dump_structure([inf_name], [inf_tensor], "Inf", [inf_info])
|
||||
|
||||
debugger_backend1 = d.DbgServices(dump_file_path=temp_dir)
|
||||
_ = debugger_backend1.initialize(net_name="alexnet", is_sync_mode=True)
|
||||
tensor_data = debugger_backend1.read_tensors(tensor_info)
|
||||
|
||||
debugger_backend2 = d.DbgServices(dump_file_path=inf_dir)
|
||||
_ = debugger_backend2.initialize(net_name="Inf", is_sync_mode=True)
|
||||
tensor_data_inf = debugger_backend2.read_tensors([inf_info])
|
||||
tensor_info.extend([inf_info])
|
||||
tensor_data.extend(tensor_data_inf)
|
||||
|
||||
shutil.rmtree(temp_dir)
|
||||
shutil.rmtree(inf_dir)
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write(
|
||||
"-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_output = " +
|
||||
str(tensor_info[x].is_output) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " +
|
||||
str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sync_trans_false_read_tensors()
|
|
@ -16,12 +16,10 @@
|
|||
Read tensor base and statistics test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
from dump_test_utils import compare_actual_with_expected
|
||||
from dump_test_utils import compare_actual_with_expected, build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_read_tensors_base_stat"
|
||||
|
@ -30,12 +28,27 @@ test_name = "sync_read_tensors_base_stat"
|
|||
def test_sync_read_tensors_base_stat():
|
||||
|
||||
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
|
||||
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
|
||||
nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
|
||||
name1 = "Add.Add-op4.0.0."
|
||||
info1 = d.TensorInfo(node_name="Default/Add-op4",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
value_path = build_dump_file_structure(value_tensor, "Add", "Add.Add-op4.0.0.")
|
||||
inf_path = build_dump_file_structure(inf_tensor, "Inf", "Reciprocal.Reciprocal-op3.0.0.")
|
||||
nan_path = build_dump_file_structure(nan_tensor, "Nan", "ReduceMean.ReduceMean-op92.0.0.")
|
||||
inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
|
||||
name2 = "Reciprocal.Reciprocal-op3.0.0."
|
||||
info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
|
||||
name3 = "ReduceMean.ReduceMean-op92.0.0."
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_info_1 = [info1]
|
||||
tensor_info_2 = [info2]
|
||||
tensor_info_3 = [info3]
|
||||
tensor_info = [info1, info2, info3]
|
||||
value_path = build_dump_structure([name1], [value_tensor], "Add", tensor_info_1)
|
||||
inf_path = build_dump_structure([name2], [inf_tensor], "Inf", tensor_info_2)
|
||||
nan_path = build_dump_structure([name3], [nan_tensor], "Nan", tensor_info_3)
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path=value_path, verbose=True)
|
||||
|
@ -55,19 +68,6 @@ def test_sync_read_tensors_base_stat():
|
|||
_ = debugger_backend_3.initialize(
|
||||
net_name="Nan", is_sync_mode=True)
|
||||
|
||||
info1 = d.TensorInfo(node_name="Default/Add-op4",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
|
||||
tensor_info_1 = [info1]
|
||||
tensor_info_2 = [info2]
|
||||
tensor_info_3 = [info3]
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_base_data_list = debugger_backend.read_tensor_base(tensor_info_1)
|
||||
tensor_base_data_list_2 = debugger_backend_2.read_tensor_base(tensor_info_2)
|
||||
tensor_base_data_list.extend(tensor_base_data_list_2)
|
||||
|
@ -84,21 +84,10 @@ def test_sync_read_tensors_base_stat():
|
|||
shutil.rmtree(inf_path)
|
||||
shutil.rmtree(nan_path)
|
||||
print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list)
|
||||
assert compare_actual_with_expected(test_name)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def build_dump_file_structure(tensor_array, net_name, tensor_name):
|
||||
debugger_temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")
|
||||
print(debugger_temp_dir)
|
||||
path = os.path.join(debugger_temp_dir, "rank_0", net_name, "0", "0")
|
||||
print(path)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
file = tempfile.mkstemp(prefix=tensor_name, suffix=".output.0.DefaultFormat.npy", dir=path)
|
||||
full_path = file[1]
|
||||
np.save(full_path, tensor_array)
|
||||
|
||||
return debugger_temp_dir
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list):
|
||||
"""Print read tensors info."""
|
||||
if GENERATE_GOLDEN:
|
||||
|
|
|
@ -16,39 +16,54 @@
|
|||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import shutil
|
||||
import numpy as np
|
||||
from dump_test_utils import compare_actual_with_expected, skip_test
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
from dump_test_utils import compare_actual_with_expected, build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_true_read_tensors_nonexist_node"
|
||||
test_name = "sync_read_tensors_nonexist_node"
|
||||
|
||||
|
||||
def test_sync_trans_read_tensors_nonexist_node():
|
||||
|
||||
if skip_test():
|
||||
return
|
||||
tensor1 = np.array([32.0, 4096.0], np.float32)
|
||||
name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
|
||||
info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
|
||||
tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
|
||||
name2 = "ReluGradV2.ReluGradV2-op406.0.0."
|
||||
info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
|
||||
slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
|
||||
# non-existing tensor with wrong op name
|
||||
info3 = d.TensorInfo(node_name="Default/CudnnUniformReal-op390",
|
||||
slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="../data/dump/gpu_dumps/sync_trans_true/alexnet")
|
||||
# non-existing tensor with wrong iteration number
|
||||
info4 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
|
||||
slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
|
||||
|
||||
tensor_name = [name1, name2]
|
||||
tensor_create_info = [info1, info2]
|
||||
tensor_list = [tensor1, tensor2]
|
||||
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_create_info)
|
||||
tensor_check_info = [info3, info4]
|
||||
|
||||
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
net_name="alexnet", is_sync_mode=True)
|
||||
|
||||
# non-existing tensor with wrong op name
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op318",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
tensor_data = debugger_backend.read_tensors(tensor_check_info)
|
||||
|
||||
# Check the length of tensor list
|
||||
assert len(tensor_info) == 1
|
||||
assert len(tensor_data) == 1
|
||||
assert len(tensor_check_info) == 2
|
||||
assert len(tensor_data) == 2
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
assert compare_actual_with_expected(test_name)
|
||||
print_read_tensors(tensor_check_info, tensor_data)
|
||||
shutil.rmtree(temp_dir)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
|
@ -65,11 +80,11 @@ def print_read_tensors(tensor_info, tensor_data):
|
|||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " +
|
||||
str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("is_output = " +
|
||||
str(tensor_info[x].is_output) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
|
@ -1,92 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
from dump_test_utils import compare_actual_with_expected, skip_test
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_false_read_tensors"
|
||||
|
||||
|
||||
def test_sync_trans_false_read_tensors():
|
||||
|
||||
if skip_test():
|
||||
return
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="alexnet", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write(
|
||||
"-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " +
|
||||
str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " +
|
||||
str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sync_trans_false_read_tensors()
|
|
@ -1,92 +0,0 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
from dump_test_utils import compare_actual_with_expected, skip_test
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_true_read_tensors"
|
||||
|
||||
|
||||
def test_sync_trans_read_tensors():
|
||||
|
||||
if skip_test():
|
||||
return
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="../data/dump/gpu_dumps/sync_trans_true/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
for x, _ in enumerate(tensor_info):
|
||||
f_write.write(
|
||||
"-----------------------------------------------------------\n")
|
||||
f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("node name = " + tensor_info[x].node_name + "\n")
|
||||
f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
|
||||
f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
|
||||
f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(tensor_info[x].root_graph_id) + "\n")
|
||||
f_write.write("is_parameter = " +
|
||||
str(tensor_info[x].is_parameter) + "\n")
|
||||
f_write.write("\n")
|
||||
f_write.write("tensor_data_" + str(x+1) + " attributes:\n")
|
||||
f_write.write("data (printed in uint8) = " + str(np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
f_write.write("The python byte size of " + str(py_byte_size) +
|
||||
" does not match the C++ byte size of " + str(c_byte_size) + "\n")
|
||||
f_write.write("size in bytes = " +
|
||||
str(tensor_data[x].data_size) + "\n")
|
||||
f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
|
||||
f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
|
||||
f_write.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_sync_trans_read_tensors()
|
|
@ -16,39 +16,71 @@
|
|||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import shutil
|
||||
import numpy as np
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
from dump_test_utils import compare_actual_with_expected, skip_test
|
||||
from dump_test_utils import compare_actual_with_expected, build_dump_structure
|
||||
|
||||
GENERATE_GOLDEN = False
|
||||
test_name = "sync_trans_false_watchpoints"
|
||||
test_name = "sync_watchpoints"
|
||||
|
||||
|
||||
def test_sync_trans_false_watchpoints():
|
||||
|
||||
if skip_test():
|
||||
return
|
||||
|
||||
if GENERATE_GOLDEN:
|
||||
f_write = open(test_name + ".expected", "w")
|
||||
else:
|
||||
f_write = open(test_name + ".actual", "w")
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet")
|
||||
name1 = "Conv2D.Conv2D-op369.0.0."
|
||||
tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
|
||||
[-1.3931e-02, 8.9359e-04, -1.1520e-02],
|
||||
[-6.3248e-03, 1.8749e-03, 1.0132e-02]],
|
||||
[[-2.5520e-03, -6.0005e-03, -5.1918e-03],
|
||||
[-2.7866e-03, 2.5487e-04, 8.4782e-04],
|
||||
[-4.6310e-03, -8.9111e-03, -8.1778e-05]],
|
||||
[[1.3914e-03, 6.0844e-04, 1.0643e-03],
|
||||
[-2.0966e-02, -1.2865e-03, -1.8692e-03],
|
||||
[-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
|
||||
slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Alexnet", is_sync_mode=True)
|
||||
name2 = "Parameter.fc2.bias.0.0."
|
||||
tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
|
||||
2.1177532e-07, 2.9952851e-06], np.float32)
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
|
||||
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
|
||||
-5.1546101e-07, 6.0798648e-06], np.float32)
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
|
||||
slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
name3 = "Parameter.fc3.bias.0.0."
|
||||
tensor4 = np.array([2.2930422e-04, -3.6369250e-04, 7.1337068e-04, -1.9567949e-05], np.float32)
|
||||
info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias",
|
||||
slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
|
||||
|
||||
tensor_info = [info1, info2, info3, info4]
|
||||
tensor_name = [name1, name2, name2, name3]
|
||||
tensor_list = [tensor1, tensor2, tensor3, tensor4]
|
||||
|
||||
temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
|
||||
|
||||
debugger_backend = d.DbgServices(dump_file_path=temp_dir)
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=True)
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
# -> watch_condition=20 is NOT_CHANGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
|
||||
"Conv2D-op369":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
|
@ -66,9 +98,9 @@ def test_sync_trans_false_watchpoints():
|
|||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
|
||||
"Conv2D-op369":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
|
@ -83,18 +115,19 @@ def test_sync_trans_false_watchpoints():
|
|||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": True
|
||||
"Parameter[6]_11/fc2.bias":
|
||||
{"rank_id": [0], "root_graph_id": [0], "is_output": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
f_write.write(
|
||||
"ERROR -> test 4: watchpoint weight change set but not hit just once\n")
|
||||
f_write.write("ERROR -> test 4: watchpoint weight change set but not hit just once\n")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
|
||||
f_write.close()
|
||||
assert compare_actual_with_expected(test_name)
|
||||
shutil.rmtree(temp_dir)
|
||||
if not GENERATE_GOLDEN:
|
||||
assert compare_actual_with_expected(test_name)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
|
||||
|
@ -104,7 +137,7 @@ def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
|
|||
"-----------------------------------------------------------\n")
|
||||
f_write.write("watchpoint_hit for test_%u attributes:" %
|
||||
test_id + "\n")
|
||||
f_write.write("name = " + str(watchpoint_hits[x].name) + "\n")
|
||||
f_write.write("name = " + watchpoint_hits[x].name + "\n")
|
||||
f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n")
|
||||
f_write.write("condition = " +
|
||||
str(watchpoint_hits[x].condition) + "\n")
|
||||
|
@ -123,8 +156,8 @@ def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
|
|||
str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
|
||||
f_write.write("error code = " +
|
||||
str(watchpoint_hits[x].error_code) + "\n")
|
||||
f_write.write("device_id = " +
|
||||
str(watchpoint_hits[x].device_id) + "\n")
|
||||
f_write.write("rank_id = " +
|
||||
str(watchpoint_hits[x].rank_id) + "\n")
|
||||
f_write.write("root_graph_id = " +
|
||||
str(watchpoint_hits[x].root_graph_id) + "\n")
|
||||
|
Loading…
Reference in New Issue