!22638 Add ut test for offline debugger

Merge pull request !22638 from sabrinasun_59ee/ut
2021-09-15 06:28:47 +00:00 · 2021-09-15 06:28:47 +00:00 · 0d914dd427
parent f7958b9993 3a349a9219
commit 0d914dd427
18 changed files with 1340 additions and 735 deletions
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@ -255,7 +255,7 @@ void DebugServices::AddAnalyzedTensorToCache(const bool recheck, const unsigned
  // add analyzed tensor to cache
  if (!recheck) {
    wp_lock_.lock();
-    wp_id_cache_[tensor_name].insert(id);
+    (void)wp_id_cache_[tensor_name].insert(id);
    wp_lock_.unlock();
  }
 }
--- a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
@ -43,7 +43,7 @@ class DbgLogger {
    if (dbg_log_path != nullptr) {
      char abspath[PATH_MAX];
      if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) {
-        return;
+        std::cout << "ERROR: DbgLogger could not create real path";
      }
      FILE *fp = freopen(abspath, "a", stdout);
      if (fp == nullptr) {
--- a/tests/ut/data/dump/gpu_dumps/golden/read_tensors_base_stat_expected.json
+++ b/tests/ut/data/dump/gpu_dumps/golden/read_tensors_base_stat_expected.json
@ -0,0 +1,150 @@
 [
    {
        "test1": {
            "tensor_info": {
                "node_name": "Default/Add-op4",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_base_info": {
                "size_in_bytes": 24,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    3
                ]
            },
            "tensor_stat_info": {
                "size_in_bytes": 24,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    3
                ],
                "is_bool": false,
                "max_vaue": 10.0,
                "min_value": -11.0,
                "avg_value": 0.880000114440918,
                "count": 6,
                "neg_zero_count": 2,
                "pos_zero_count": 3,
                "nan_count": 0,
                "neg_inf_count": 0,
                "pos_inf_count": 0,
                "zero_count": 1
            }
        }
    },
    {
        "test2": {
            "tensor_info": {
                "node_name": "Default/Reciprocal-op3",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_base_info": {
                "size_in_bytes": 40,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    5
                ]
            },
            "tensor_stat_info": {
                "size_in_bytes": 40,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    5
                ],
                "is_bool": false,
                "max_vaue": 1.0,
                "min_value": 1.0,
                "avg_value": 1.0,
                "count": 10,
                "neg_zero_count": 0,
                "pos_zero_count": 2,
                "nan_count": 0,
                "neg_inf_count": 3,
                "pos_inf_count": 5,
                "zero_count": 0
            }
        }
    },
    {
        "test3": {
            "tensor_info": {
                "node_name": "Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_base_info": {
                "size_in_bytes": 20,
                "debugger_dtype": 11,
                "shape": [
                    5
                ]
            },
            "tensor_stat_info": {
                "size_in_bytes": 20,
                "debugger_dtype": 11,
                "shape": [
                    5
                ],
                "is_bool": false,
                "max_vaue": 1.9901361465454102,
                "min_value": -2.175431728363037,
                "avg_value": -0.6648297309875488,
                "count": 5,
                "neg_zero_count": 2,
                "pos_zero_count": 1,
                "nan_count": 2,
                "neg_inf_count": 0,
                "pos_inf_count": 0,
                "zero_count": 0
            }
        }
    },
    {
        "test4": {
            "tensor_info": {
                "node_name": "invalid_name_for_test",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_base_info": {
                "size_in_bytes": 0,
                "debugger_dtype": 0,
                "shape": []
            },
            "tensor_stat_info": {
                "size_in_bytes": 0,
                "debugger_dtype": 0,
                "shape": [],
                "is_bool": false,
                "max_vaue": -1.7976931348623157e+308,
                "min_value": 1.7976931348623157e+308,
                "avg_value": 0.0,
                "count": 0,
                "neg_zero_count": 0,
                "pos_zero_count": 0,
                "nan_count": 0,
                "neg_inf_count": 0,
                "pos_inf_count": 0,
                "zero_count": 0
            }
        }
    }
 ]
--- a/tests/ut/data/dump/gpu_dumps/golden/read_tensors_expected.json
+++ b/tests/ut/data/dump/gpu_dumps/golden/read_tensors_expected.json
@ -0,0 +1,315 @@
 [
    {
        "tensor_1": {
            "tensor_info": {
                "node_name": "Default/CudnnUniformReal-op391",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": false
            },
            "tensor_data": {
                "data": [
                    0,
                    0,
                    0,
                    66,
                    0,
                    0,
                    128,
                    69
                ],
                "size_in_bytes": 8,
                "debugger_dtype": 11,
                "shape": [
                    2
                ]
            }
        }
    },
    {
        "tensor_2": {
            "tensor_info": {
                "node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                "slot": 1,
                "iteration": 1,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": false
            },
            "tensor_data": {
                "data": [
                    0,
                    0,
                    0,
                    0,
                    0,
                    0,
                    0,
                    66,
                    0,
                    0,
                    128,
                    69,
                    0,
                    0,
                    144,
                    64,
                    195,
                    245,
                    216,
                    64,
                    0,
                    0,
                    48,
                    193
                ],
                "size_in_bytes": 24,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    3
                ]
            }
        }
    },
    {
        "tensor_3": {
            "tensor_info": {
                "node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424",
                "slot": 0,
                "iteration": 1,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_data": {
                "data": [
                    8,
                    255,
                    166,
                    56,
                    189,
                    58,
                    71,
                    56,
                    103,
                    3,
                    217,
                    55,
                    170,
                    225,
                    174,
                    56,
                    135,
                    195,
                    82,
                    56,
                    54,
                    253,
                    225,
                    55,
                    254,
                    158,
                    179,
                    56,
                    33,
                    66,
                    88,
                    56,
                    30,
                    248,
                    222,
                    55,
                    241,
                    32,
                    168,
                    56,
                    143,
                    126,
                    73,
                    56,
                    116,
                    129,
                    228,
                    55,
                    53,
                    254,
                    175,
                    56,
                    2,
                    0,
                    87,
                    56,
                    246,
                    124,
                    238,
                    55,
                    177,
                    160,
                    180,
                    56,
                    156,
                    126,
                    92,
                    56,
                    144,
                    121,
                    236,
                    55,
                    117,
                    189,
                    159,
                    56,
                    25,
                    132,
                    32,
                    56,
                    154,
                    1,
                    178,
                    54,
                    187,
                    189,
                    156,
                    56,
                    117,
                    252,
                    27,
                    56,
                    205,
                    2,
                    76,
                    54,
                    212,
                    127,
                    148,
                    56,
                    129,
                    1,
                    12,
                    56,
                    53,
                    253,
                    11,
                    182
                ],
                "size_in_bytes": 108,
                "debugger_dtype": 11,
                "shape": [
                    3,
                    3,
                    3
                ]
            }
        }
    },
    {
        "tensor_4": {
            "tensor_info": {
                "node_name": "Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
                "slot": 1,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_data": {
                "data": [
                    104,
                    60,
                    33,
                    79,
                    53,
                    6,
                    131,
                    78,
                    78,
                    232,
                    126,
                    79,
                    154,
                    198,
                    85,
                    79,
                    245,
                    52,
                    84,
                    78,
                    70,
                    207,
                    222,
                    78
                ],
                "size_in_bytes": 24,
                "debugger_dtype": 11,
                "shape": [
                    6
                ]
            }
        }
    },
    {
        "tensor_5": {
            "tensor_info": {
                "node_name": "Default/Reciprocal-op3",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_data": {
                "data": [
                    0,
                    0,
                    128,
                    63,
                    0,
                    0,
                    128,
                    255,
                    0,
                    0,
                    128,
                    127,
                    0,
                    0,
                    128,
                    255,
                    0,
                    0,
                    128,
                    127,
                    0,
                    0,
                    128,
                    127,
                    0,
                    0,
                    128,
                    63,
                    0,
                    0,
                    128,
                    255,
                    0,
                    0,
                    128,
                    127,
                    0,
                    0,
                    128,
                    127
                ],
                "size_in_bytes": 40,
                "debugger_dtype": 11,
                "shape": [
                    2,
                    5
                ]
            }
        }
    }
 ]
--- a/tests/ut/data/dump/gpu_dumps/golden/read_tensors_nonexist_node_expected.json
+++ b/tests/ut/data/dump/gpu_dumps/golden/read_tensors_nonexist_node_expected.json
@ -0,0 +1,56 @@
 [
    {
        "tensor_1": {
            "tensor_info": {
                "node_name": "Default/CudnnUniformReal-op390",
                "slot": 0,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": false
            },
            "tensor_data": {
                "data": [],
                "size_in_bytes": 0,
                "debugger_dtype": 0,
                "shape": []
            }
        }
    },
    {
        "tensor_2": {
            "tensor_info": {
                "node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                "slot": 1,
                "iteration": 0,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": false
            },
            "tensor_data": {
                "data": [],
                "size_in_bytes": 0,
                "debugger_dtype": 0,
                "shape": []
            }
        }
    },
    {
        "tensor_3": {
            "tensor_info": {
                "node_name": "Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                "slot": 1,
                "iteration": 1,
                "rank_id": 0,
                "root_graph_id": 0,
                "is_output": true
            },
            "tensor_data": {
                "data": [],
                "size_in_bytes": 0,
                "debugger_dtype": 0,
                "shape": []
            }
        }
    }
 ]
--- a/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors.expected
+++ b/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors.expected
@ -1,79 +0,0 @@
 -----------------------------------------------------------
 tensor_info_1 attributes:
 node name = Default/CudnnUniformReal-op391
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = False
 tensor_data_1 attributes:
 data (printed in uint8) = [  0   0   0  66   0   0 128  69]
 size in bytes = 8
 debugger dtype = 11
 shape = [2]
 -----------------------------------------------------------
 tensor_info_2 attributes:
 node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
 slot = 1
 iteration = 1
 rank_id = 0
 root_graph_id = 0
 is_output = False
 tensor_data_2 attributes:
 data (printed in uint8) = [  0   0   0   0   0   0   0  66   0   0 128  69   0   0 144  64 195 245
 216  64   0   0  48 193]
 size in bytes = 24
 debugger dtype = 11
 shape = [2, 3]
 -----------------------------------------------------------
 tensor_info_3 attributes:
 node name = Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/gradConv2D/Conv2DBackpropFilter-op424
 slot = 0
 iteration = 1
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_data_3 attributes:
 data (printed in uint8) = [  8 255 166  56 189  58  71  56 103   3 217  55 170 225 174  56 135 195
  82  56  54 253 225  55 254 158 179  56  33  66  88  56  30 248 222  55
 241  32 168  56 143 126  73  56 116 129 228  55  53 254 175  56   2   0
  87  56 246 124 238  55 177 160 180  56 156 126  92  56 144 121 236  55
 117 189 159  56  25 132  32  56 154   1 178  54 187 189 156  56 117 252
  27  56 205   2  76  54 212 127 148  56 129   1  12  56  53 253  11 182]
 size in bytes = 108
 debugger dtype = 11
 shape = [3, 3, 3]
 -----------------------------------------------------------
 tensor_info_4 attributes:
 node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381
 slot = 1
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_data_4 attributes:
 data (printed in uint8) = [104  60  33  79  53   6 131  78  78 232 126  79 154 198  85  79 245  52
  84  78  70 207 222  78]
 size in bytes = 24
 debugger dtype = 11
 shape = [6]
 -----------------------------------------------------------
 tensor_info_5 attributes:
 node name = Default/Reciprocal-op3
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_data_5 attributes:
 data (printed in uint8) = [  0   0 128  63   0   0 128 255   0   0 128 127   0   0 128 255   0   0
 128 127   0   0 128 127   0   0 128  63   0   0 128 255   0   0 128 127
   0   0 128 127]
 size in bytes = 40
 debugger dtype = 11
 shape = [2, 5]
--- a/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors_base_stat.expected
+++ b/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors_base_stat.expected
@ -1,120 +0,0 @@
 -----------------------------------------------------------
 tensor_info_1 attributes:
 node name = Default/Add-op4
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_base_info:
 size in bytes = 24
 debugger dtype = 11
 shape = [2, 3]
 tensor_stat_info:
 size in bytes = 24
 debugger dtype = 11
 shape = [2, 3]
 is_bool = False
 max_value = 10.0
 min_value = -11.0
 avg_value = 0.880000114440918
 count = 6
 neg_zero_count = 2
 pos_zero_count = 3
 nan_count = 0
 neg_inf_count = 0
 pos_inf_count = 0
 zero_count = 1
 -----------------------------------------------------------
 tensor_info_2 attributes:
 node name = Default/Reciprocal-op3
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_base_info:
 size in bytes = 40
 debugger dtype = 11
 shape = [2, 5]
 tensor_stat_info:
 size in bytes = 40
 debugger dtype = 11
 shape = [2, 5]
 is_bool = False
 max_value = 1.0
 min_value = 1.0
 avg_value = 1.0
 count = 10
 neg_zero_count = 0
 pos_zero_count = 2
 nan_count = 0
 neg_inf_count = 3
 pos_inf_count = 5
 zero_count = 0
 -----------------------------------------------------------
 tensor_info_3 attributes:
 node name = Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_base_info:
 size in bytes = 20
 debugger dtype = 11
 shape = [5]
 tensor_stat_info:
 size in bytes = 20
 debugger dtype = 11
 shape = [5]
 is_bool = False
 max_value = 1.9901361465454102
 min_value = -2.175431728363037
 avg_value = -0.6648297309875488
 count = 5
 neg_zero_count = 2
 pos_zero_count = 1
 nan_count = 2
 neg_inf_count = 0
 pos_inf_count = 0
 zero_count = 0
 -----------------------------------------------------------
 tensor_info_4 attributes:
 node name = invalid_name_for_test
 slot = 0
 iteration = 0
 rank_id = 0
 root_graph_id = 0
 is_output = True
 tensor_base_info:
 size in bytes = 0
 debugger dtype = 0
 shape = []
 tensor_stat_info:
 size in bytes = 0
 debugger dtype = 0
 shape = []
 is_bool = False
 max_value = -1.7976931348623157e+308
 min_value = 1.7976931348623157e+308
 avg_value = 0.0
 count = 0
 neg_zero_count = 0
 pos_zero_count = 0
 nan_count = 0
 neg_inf_count = 0
 pos_inf_count = 0
 zero_count = 0
--- a/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors_nonexist_node.expected
+++ b/tests/ut/data/dump/gpu_dumps/golden/sync_read_tensors_nonexist_node.expected
@ -1,28 +0,0 @@
 -----------------------------------------------------------
 tensor_info_1 attributes:
 node name =  Default/CudnnUniformReal-op390
 slot =  0
 iteration =  0
 rank_id =  0
 root_graph_id =  0
 is_output =  False
 tensor_data_1 attributes:
 data (printed in uint8) =  []
 size in bytes =  0
 debugger dtype =  0
 shape =  []
 -----------------------------------------------------------
 tensor_info_2 attributes:
 node name =  Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406
 slot =  1
 iteration =  0
 rank_id =  0
 root_graph_id =  0
 is_output =  False
 tensor_data_2 attributes:
 data (printed in uint8) =  []
 size in bytes =  0
 debugger dtype =  0
 shape =  []
--- a/tests/ut/data/dump/gpu_dumps/golden/sync_watchpoints.expected
+++ b/tests/ut/data/dump/gpu_dumps/golden/sync_watchpoints.expected
@ -1,33 +0,0 @@
 -----------------------------------------------------------
 watchpoint_hit for test_1 attributes:
 name =  Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369
 slot =  1
 condition =  6
 watchpoint_id =  1
 parameter  0  name =  param
 parameter  0  disabled =  False
 parameter  0  value =  0.0
 parameter  0  hit =  True
 parameter  0  actual_value =  -0.020966000854969025
 error code =  0
 rank_id =  0
 root_graph_id =  0
 -----------------------------------------------------------
 watchpoint_hit for test_4 attributes:
 name =  Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias
 slot =  0
 condition =  18
 watchpoint_id =  3
 parameter  0  name =  abs_mean_update_ratio_gt
 parameter  0  disabled =  False
 parameter  0  value =  0.0
 parameter  0  hit =  True
 parameter  0  actual_value =  1.0156775705209766
 parameter  1  name =  epsilon
 parameter  1  disabled =  True
 parameter  1  value =  0.0
 parameter  1  hit =  False
 parameter  1  actual_value =  0.0
 error code =  0
 rank_id =  0
 root_graph_id =  0
--- a/tests/ut/data/dump/gpu_dumps/golden/watchpoints_expected.json
+++ b/tests/ut/data/dump/gpu_dumps/golden/watchpoints_expected.json
@ -0,0 +1,77 @@
 [
    {
        "watchpoint_hit1": {
            "name": "Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
            "slot": 1,
            "condition": 6,
            "watchpoint_id": 1,
            "paremeter": [
                {
                    "parameter0": {
                        "name": "param",
                        "disabled": false,
                        "value": 0.0,
                        "hit": true,
                        "actual_value": -0.020966000854969025
                    }
                }
            ],
            "error_code": 0,
            "rank_id": 0,
            "root_graph_id": 0
        }
    },
    {
        "watchpoint_hit2": {
            "name": "Default/CudnnUniformReal-op391",
            "slot": 0,
            "condition": 6,
            "watchpoint_id": 2,
            "paremeter": [
                {
                    "parameter0": {
                        "name": "param",
                        "disabled": false,
                        "value": 10.0,
                        "hit": true,
                        "actual_value": -4096.0
                    }
                }
            ],
            "error_code": 0,
            "rank_id": 0,
            "root_graph_id": 0
        }
    },
    {
        "watchpoint_hit3": {
            "name": "Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
            "slot": 0,
            "condition": 18,
            "watchpoint_id": 3,
            "paremeter": [
                {
                    "parameter0": {
                        "name": "abs_mean_update_ratio_gt",
                        "disabled": false,
                        "value": 0.0,
                        "hit": true,
                        "actual_value": 1.0156775705209766
                    }
                },
                {
                    "parameter1": {
                        "name": "epsilon",
                        "disabled": true,
                        "value": 0.0,
                        "hit": false,
                        "actual_value": 0.0
                    }
                }
            ],
            "error_code": 0,
            "rank_id": 0,
            "root_graph_id": 0
        }
    }
 ]
--- a/tests/ut/python/debugger/gpu_tests/dump_test_utils.py
+++ b/tests/ut/python/debugger/gpu_tests/dump_test_utils.py
@ -16,20 +16,11 @@
 Utils for testing offline debugger.
 """
 import filecmp
 import os
 import tempfile
 import numpy as np
 def compare_actual_with_expected(test_name):
    """Compare actual file with expected."""
    is_eq = filecmp.cmp("../data/dump/gpu_dumps/golden/" +
                        test_name + ".expected", test_name + ".actual", shallow=False)
    if os.path.exists(test_name + ".actual"):
        os.remove(test_name + ".actual")
    return is_eq
 def build_dump_structure(tensor_name_list, tensor_list, net_name, tensor_info_list):
    """Build dump file structure from tensor_list."""
    temp_dir = tempfile.mkdtemp(prefix=net_name, dir="./")
--- a/tests/ut/python/debugger/gpu_tests/test_read_tensors.py
+++ b/tests/ut/python/debugger/gpu_tests/test_read_tensors.py
@ -0,0 +1,180 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Read tensor test script for offline debugger APIs.
 """
 import os
 import shutil
 import json
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import build_dump_structure
 from tests.security_utils import security_off_wrap
 class TestOfflineReadTensor:
    """Test read tensor for offline debugger."""
    GENERATE_GOLDEN = False
    test_name = "read_tensors"
    tensor_json = []
    temp_dir = ''
    @classmethod
    def setup_class(cls):
        """Init setup for offline read tensor test"""
        # input tensor with zero slot
        tensor1 = np.array([32.0, 4096.0], np.float32)
        name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
        info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
        # input tensor with non-zero slot
        tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
        name2 = "ReluGradV2.ReluGradV2-op406.0.0."
        info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
                             "gradReLU/ReluGradV2-op406",
                             slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
        # output tensor with zero slot
        tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
                             [8.339e-05, 5.025e-05, 2.694e-05],
                             [8.565e-05, 5.156e-05, 2.658e-05]],
                            [[8.017e-05, 4.804e-05, 2.724e-05],
                             [8.392e-05, 5.126e-05, 2.843e-05],
                             [8.613e-05, 5.257e-05, 2.819e-05]],
                            [[7.617e-05, 3.827e-05, 5.305e-06],
                             [7.474e-05, 3.719e-05, 3.040e-06],
                             [7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
        name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
        info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
                             "gradConv2D/Conv2DBackpropFilter-op424",
                             slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
        # output tensor with non-zero slot
        tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
        name4 = "ReLUV2.ReLUV2-op381.0.0."
        info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
                             slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
        tensor_name = [name1, name2, name3, name4]
        tensor_list = [tensor1, tensor2, tensor3, tensor4]
        cls.tensor_info = [info1, info2, info3, info4]
        cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", cls.tensor_info)
        # inf tensor
        inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf],
                               [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
        inf_name = "Reciprocal.Reciprocal-op3.0.0."
        cls.inf_info = d.TensorInfo(node_name="Default/Reciprocal-op3",
                                    slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
        cls.inf_dir = build_dump_structure([inf_name], [inf_tensor], "Inf", [cls.inf_info])
    @classmethod
    def teardown_class(cls):
        """Run after test this class."""
        shutil.rmtree(cls.temp_dir)
        shutil.rmtree(cls.inf_dir)
    @security_off_wrap
    def test_sync_read_tensors(self):
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
        tensor_data = debugger_backend.read_tensors(self.tensor_info)
        if self.GENERATE_GOLDEN:
            self.print_read_tensors(self.tensor_info, tensor_data, 0, False)
        else:
            self.compare_expect_actual_result(self.tensor_info, tensor_data, 0)
    @security_off_wrap
    def test_sync_read_inf_tensors(self):
        debugger_backend = d.DbgServices(dump_file_path=self.inf_dir)
        _ = debugger_backend.initialize(net_name="Inf", is_sync_mode=True)
        tensor_data_inf = debugger_backend.read_tensors([self.inf_info])
        if self.GENERATE_GOLDEN:
            self.print_read_tensors([self.inf_info], tensor_data_inf, 4, False)
        else:
            self.compare_expect_actual_result([self.inf_info], tensor_data_inf, 4)
    @security_off_wrap
    def test_async_read_tensors(self):
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
        tensor_data = debugger_backend.read_tensors(self.tensor_info)
        if not self.GENERATE_GOLDEN:
            self.compare_expect_actual_result(self.tensor_info, tensor_data, 0)
    @security_off_wrap
    def test_async_read_inf_tensors(self):
        debugger_backend = d.DbgServices(dump_file_path=self.inf_dir)
        _ = debugger_backend.initialize(net_name="Inf", is_sync_mode=False)
        tensor_data_inf = debugger_backend.read_tensors([self.inf_info])
        if not self.GENERATE_GOLDEN:
            self.compare_expect_actual_result([self.inf_info], tensor_data_inf, 4)
    def compare_expect_actual_result(self, tensor_info_list, tensor_data_list, test_index):
        """Compare actual result with golden file."""
        golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
                                                    self.test_name + "_expected.json"))
        with open(golden_file) as f:
            expected_list = json.load(f)
        for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
            test_id = "tensor_"+ str(test_index+x+1)
            info = expected_list[x+test_index][test_id]
            assert tensor_info.node_name == info['tensor_info']['node_name']
            assert tensor_info.slot == info['tensor_info']['slot']
            assert tensor_info.iteration == info['tensor_info']['iteration']
            assert tensor_info.rank_id == info['tensor_info']['rank_id']
            assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
            assert tensor_info.is_output == info['tensor_info']['is_output']
            actual_data = np.frombuffer(
                tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
            assert actual_data == info['tensor_data']['data']
            assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
            assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
            assert tensor_data.shape == info['tensor_data']['shape']
    def print_read_tensors(self, tensor_info_list, tensor_data_list, test_index, is_print):
        """Print read tensors result if GENERATE_GOLDEN is True."""
        for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
            tensor = "tensor_" + str(test_index+x+1)
            data = np.frombuffer(
                tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
            py_byte_size = len(tensor_data.data_ptr)
            c_byte_size = tensor_data.data_size
            if c_byte_size != py_byte_size:
                print("The python byte size of " + str(py_byte_size) +
                      " does not match the C++ byte size of " + str(c_byte_size) + "\n")
            self.tensor_json.append({
                tensor: {
                    'tensor_info': {
                        'node_name': tensor_info.node_name,
                        'slot': tensor_info.slot,
                        'iteration': tensor_info.iteration,
                        'rank_id': tensor_info.rank_id,
                        'root_graph_id': tensor_info.root_graph_id,
                        'is_output': tensor_info.is_output
                    },
                    'tensor_data': {
                        'data': data,
                        'size_in_bytes': tensor_data.data_size,
                        'debugger_dtype': tensor_data.dtype,
                        'shape': tensor_data.shape
                    }
                }
            })
        if is_print:
            with open(self.test_name + "_expected.json", "w") as dump_f:
                json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))
--- a/tests/ut/python/debugger/gpu_tests/test_read_tensors_nonexist_node.py
+++ b/tests/ut/python/debugger/gpu_tests/test_read_tensors_nonexist_node.py
@ -0,0 +1,164 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Read tensor test script for offline debugger APIs.
 """
 import os
 import json
 import shutil
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import build_dump_structure
 from tests.security_utils import security_off_wrap
 class TestOfflineReadNonExistTensor:
    """Test reading non exist tensor for offline debugger"""
    GENERATE_GOLDEN = False
    test_name = "read_tensors_nonexist_node"
    tensor_json = []
    temp_dir = ''
    @classmethod
    def setup_class(cls):
        """Init setup for offline read tensor test"""
        tensor1 = np.array([32.0, 4096.0], np.float32)
        name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
        info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
        tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
        name2 = "ReluGradV2.ReluGradV2-op406.0.0."
        info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet"
                             "/gradReLU/ReluGradV2-op406",
                             slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
        tensor_name = [name1, name2]
        tensor_info = [info1, info2]
        tensor_list = [tensor1, tensor2]
        cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
    @classmethod
    def teardown_class(cls):
        shutil.rmtree(cls.temp_dir)
    @security_off_wrap
    def test_read_tensors_wrong_op_name(self):
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(
            net_name="Test", is_sync_mode=True)
        # non-existing tensor with wrong op name
        info_nonexist = d.TensorInfo(node_name="Default/CudnnUniformReal-op390",
                                     slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
        tensor_data = debugger_backend.read_tensors([info_nonexist])
        # Check the length of tensor data
        assert len(tensor_data) == 1
        if self.GENERATE_GOLDEN:
            self.print_read_tensors([info_nonexist], tensor_data, 0, False)
        else:
            self.compare_expect_actual_result([info_nonexist], tensor_data, 0)
    @security_off_wrap
    def test_read_tensors_wrong_iteration(self):
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(
            net_name="Test", is_sync_mode=True)
        # non-existing tensor with wrong iteration number
        info_nonexist = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
                                     "gradReLU/ReluGradV2-op406",
                                     slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
        tensor_data = debugger_backend.read_tensors([info_nonexist])
        assert len(tensor_data) == 1
        if self.GENERATE_GOLDEN:
            self.print_read_tensors([info_nonexist], tensor_data, 1, True)
        else:
            self.compare_expect_actual_result([info_nonexist], tensor_data, 1)
    @security_off_wrap
    def test_read_tensors_wrong_is_output(self):
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(
            net_name="Test", is_sync_mode=True)
        # non-existing tensor with wrong is_output
        info_nonexist = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/"
                                     "gradReLU/ReluGradV2-op406",
                                     slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
        tensor_data = debugger_backend.read_tensors([info_nonexist])
        assert len(tensor_data) == 1
        if self.GENERATE_GOLDEN:
            self.print_read_tensors([info_nonexist], tensor_data, 2, True)
        else:
            self.compare_expect_actual_result([info_nonexist], tensor_data, 2)
    def compare_expect_actual_result(self, tensor_info_list, tensor_data_list, test_index):
        """Compare actual result with golden file."""
        golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
                                                    self.test_name + "_expected.json"))
        with open(golden_file) as f:
            expected_list = json.load(f)
        for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
            tensor_id = "tensor_"+ str(test_index+x+1)
            info = expected_list[x+test_index][tensor_id]
            assert tensor_info.node_name == info['tensor_info']['node_name']
            assert tensor_info.slot == info['tensor_info']['slot']
            assert tensor_info.iteration == info['tensor_info']['iteration']
            assert tensor_info.rank_id == info['tensor_info']['rank_id']
            assert tensor_info.root_graph_id == info['tensor_info']['root_graph_id']
            assert tensor_info.is_output == info['tensor_info']['is_output']
            actual_data = np.frombuffer(
                tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
            assert actual_data == info['tensor_data']['data']
            assert tensor_data.data_size == info['tensor_data']['size_in_bytes']
            assert tensor_data.dtype == info['tensor_data']['debugger_dtype']
            assert tensor_data.shape == info['tensor_data']['shape']
    def print_read_tensors(self, tensor_info_list, tensor_data_list, test_index, is_print):
        """Print read tensors result if GENERATE_GOLDEN is True."""
        for x, (tensor_info, tensor_data) in enumerate(zip(tensor_info_list, tensor_data_list)):
            tensor = "tensor_" + str(test_index+x+1)
            data = np.frombuffer(
                tensor_data.data_ptr, np.uint8, tensor_data.data_size).tolist()
            py_byte_size = len(tensor_data.data_ptr)
            c_byte_size = tensor_data.data_size
            if c_byte_size != py_byte_size:
                print("The python byte size of " + str(py_byte_size) +
                      " does not match the C++ byte size of " + str(c_byte_size) + "\n")
            self.tensor_json.append({
                tensor: {
                    'tensor_info': {
                        'node_name': tensor_info.node_name,
                        'slot': tensor_info.slot,
                        'iteration': tensor_info.iteration,
                        'rank_id': tensor_info.rank_id,
                        'root_graph_id': tensor_info.root_graph_id,
                        'is_output': tensor_info.is_output
                    },
                    'tensor_data': {
                        'data': data,
                        'size_in_bytes': tensor_data.data_size,
                        'debugger_dtype': tensor_data.dtype,
                        'shape': tensor_data.shape
                    }
                }
            })
        if is_print:
            with open(self.test_name + "_expected.json", "w") as dump_f:
                json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))
--- a/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors.py
+++ b/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors.py
@ -1,127 +0,0 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Read tensor test script for offline debugger APIs.
 """
 import shutil
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import compare_actual_with_expected, build_dump_structure
 from tests.security_utils import security_off_wrap
 GENERATE_GOLDEN = False
 test_name = "sync_read_tensors"
@security_off_wrap
 def test_sync_trans_false_read_tensors():
    # input tensor with zero slot
    tensor1 = np.array([32.0, 4096.0], np.float32)
    name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
    info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
    # input tensor with non-zero slot
    tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
    name2 = "ReluGradV2.ReluGradV2-op406.0.0."
    info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                         slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
    # output tensor with zero slot
    tensor3 = np.array([[[7.963e-05, 4.750e-05, 2.587e-05],
                         [8.339e-05, 5.025e-05, 2.694e-05],
                         [8.565e-05, 5.156e-05, 2.658e-05]],
                        [[8.017e-05, 4.804e-05, 2.724e-05],
                         [8.392e-05, 5.126e-05, 2.843e-05],
                         [8.613e-05, 5.257e-05, 2.819e-05]],
                        [[7.617e-05, 3.827e-05, 5.305e-06],
                         [7.474e-05, 3.719e-05, 3.040e-06],
                         [7.081e-05, 3.338e-05, -2.086e-06]]], np.float32)
    name3 = "Conv2DBackpropFilter.Conv2DBackpropFilter-op424.0.0."
    info3 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/conv5-Conv2d/"
                         "gradConv2D/Conv2DBackpropFilter-op424",
                         slot=0, iteration=1, rank_id=0, root_graph_id=0, is_output=True)
    # output tensor with non-zero slot
    tensor4 = np.array([2705090541, 1099111076, 4276637100, 3586562544, 890060077, 1869062900], np.float32)
    name4 = "ReLUV2.ReLUV2-op381.0.0."
    info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op381",
                         slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
    # inf tensor
    inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
    inf_name = "Reciprocal.Reciprocal-op3.0.0."
    inf_info = d.TensorInfo(node_name="Default/Reciprocal-op3",
                            slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
    tensor_name = [name1, name2, name3, name4]
    tensor_list = [tensor1, tensor2, tensor3, tensor4]
    tensor_info = [info1, info2, info3, info4]
    temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
    inf_dir = build_dump_structure([inf_name], [inf_tensor], "Inf", [inf_info])
    debugger_backend1 = d.DbgServices(dump_file_path=temp_dir)
    _ = debugger_backend1.initialize(net_name="alexnet", is_sync_mode=True)
    tensor_data = debugger_backend1.read_tensors(tensor_info)
    debugger_backend2 = d.DbgServices(dump_file_path=inf_dir)
    _ = debugger_backend2.initialize(net_name="Inf", is_sync_mode=True)
    tensor_data_inf = debugger_backend2.read_tensors([inf_info])
    tensor_info.extend([inf_info])
    tensor_data.extend(tensor_data_inf)
    shutil.rmtree(temp_dir)
    shutil.rmtree(inf_dir)
    print_read_tensors(tensor_info, tensor_data)
    if not GENERATE_GOLDEN:
        assert compare_actual_with_expected(test_name)
 def print_read_tensors(tensor_info, tensor_data):
    """Print read tensors."""
    if GENERATE_GOLDEN:
        f_write = open(test_name + ".expected", "w")
    else:
        f_write = open(test_name + ".actual", "w")
    for x, _ in enumerate(tensor_info):
        f_write.write(
            "-----------------------------------------------------------\n")
        f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
        f_write.write("node name = " + tensor_info[x].node_name + "\n")
        f_write.write("slot = " + str(tensor_info[x].slot) + "\n")
        f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n")
        f_write.write("rank_id = " + str(tensor_info[x].rank_id) + "\n")
        f_write.write("root_graph_id = " +
                      str(tensor_info[x].root_graph_id) + "\n")
        f_write.write("is_output = " +
                      str(tensor_info[x].is_output) + "\n")
        f_write.write("\n")
        f_write.write("tensor_data_" + str(x+1) + " attributes:\n")
        f_write.write("data (printed in uint8) = " + str(np.frombuffer(
            tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
        py_byte_size = len(tensor_data[x].data_ptr)
        c_byte_size = tensor_data[x].data_size
        if c_byte_size != py_byte_size:
            f_write.write("The python byte size of " + str(py_byte_size) +
                          " does not match the C++ byte size of " + str(c_byte_size) + "\n")
        f_write.write("size in bytes = " +
                      str(tensor_data[x].data_size) + "\n")
        f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n")
        f_write.write("shape = " + str(tensor_data[x].shape) + "\n")
    f_write.close()
 if __name__ == "__main__":
    test_sync_trans_false_read_tensors()
--- a/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors_base_stat.py
+++ b/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors_base_stat.py
@ -15,82 +15,178 @@
 """
 Read tensor base and statistics test script for offline debugger APIs.
 """
 import os
 import shutil
 import json
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
-from dump_test_utils import compare_actual_with_expected, build_dump_structure
+from dump_test_utils import build_dump_structure
 from tests.security_utils import security_off_wrap
 GENERATE_GOLDEN = False
 test_name = "sync_read_tensors_base_stat"
 class TestOfflineReadTensorBaseStat:
    """Test read tensor base stat for offline debugger"""
    GENERATE_GOLDEN = False
    test_name = "read_tensors_base_stat"
    tensor_json = []
    test_path = ''
-@security_off_wrap
+    @classmethod
-def test_sync_read_tensors_base_stat():
+    def setup_class(cls):
        """Init setup for offline read tensor test"""
        value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
        name1 = "Add.Add-op4.0.0."
        info1 = d.TensorInfo(node_name="Default/Add-op4",
                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
-    value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)
+        inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf],
-    name1 = "Add.Add-op4.0.0."
+                               [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
-    info1 = d.TensorInfo(node_name="Default/Add-op4",
+        name2 = "Reciprocal.Reciprocal-op3.0.0."
-                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
+        info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
-    inf_tensor = np.array([[1., -np.inf, np.inf, -np.inf, np.inf], [np.inf, 1., -np.inf, np.inf, np.inf]], np.float32)
+        nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
-    name2 = "Reciprocal.Reciprocal-op3.0.0."
+        name3 = "ReduceMean.ReduceMean-op92.0.0."
-    info2 = d.TensorInfo(node_name="Default/Reciprocal-op3",
+        info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
-                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
+                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
-    nan_tensor = np.array([-2.1754317, 1.9901361, np.nan, np.nan, -1.8091936], np.float32)
+        invalid_tensor = np.array([[1.1, -2.2], [3.3, -4.4]], np.float32)
-    name3 = "ReduceMean.ReduceMean-op92.0.0."
+        name4 = "Add.Add-op1.0.0."
-    info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-MockModel/ReduceMean-op92",
+        info4 = d.TensorInfo(node_name="invalid_name_for_test",
-                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
+                             slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
-    invalid_tensor = np.array([[1.1, -2.2], [3.3, -4.4]], np.float32)
+        cls.tensor_info_1 = [info1]
-    name4 = "Add.Add-op1.0.0."
+        cls.tensor_info_2 = [info2]
-    info4 = d.TensorInfo(node_name="invalid_name_for_test",
+        cls.tensor_info_3 = [info3]
-                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=True)
+        cls.tensor_info_4 = [info4]
        cls.tensor_info = [info1, info2, info3, info4]
        cls.test_path = build_dump_structure([name1, name2, name3, name4],
                                             [value_tensor, inf_tensor, nan_tensor, invalid_tensor],
                                             "Test", cls.tensor_info)
        cls.debugger_backend = d.DbgServices(dump_file_path=cls.test_path, verbose=True)
        _ = cls.debugger_backend.initialize(net_name="Test", is_sync_mode=True)
-    tensor_info = [info1, info2, info3, info4]
+    @classmethod
-    test_path = build_dump_structure([name1, name2, name3, name4],
+    def teardown_class(cls):
-                                     [value_tensor, inf_tensor, nan_tensor, invalid_tensor],
+        shutil.rmtree(cls.test_path)
                                     "Test", tensor_info)
-    debugger_backend = d.DbgServices(
+    @security_off_wrap
-        dump_file_path=test_path, verbose=True)
+    def test_read_value_tensors_base_stat(self):
        tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_1)
        tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_1)
-    _ = debugger_backend.initialize(
+        if self.GENERATE_GOLDEN:
-        net_name="Test", is_sync_mode=True)
+            self.print_read_tensors(self.tensor_info_1, tensor_base_data_list, tensor_stat_data_list, 0, False)
        else:
            self.compare_expect_actual_result(self.tensor_info_1, tensor_base_data_list, tensor_stat_data_list, 0)
-    tensor_base_data_list = debugger_backend.read_tensor_base(tensor_info)
+    @security_off_wrap
-    tensor_stat_data_list = debugger_backend.read_tensor_stats(tensor_info)
+    def test_read_inf_tensors_base_stat(self):
-    shutil.rmtree(test_path)
+        tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_2)
-    print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list)
+        tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_2)
    if not GENERATE_GOLDEN:
        assert compare_actual_with_expected(test_name)
        if self.GENERATE_GOLDEN:
            self.print_read_tensors(self.tensor_info_2, tensor_base_data_list, tensor_stat_data_list, 1, False)
        else:
            self.compare_expect_actual_result(self.tensor_info_2, tensor_base_data_list, tensor_stat_data_list, 1)
-def print_read_tensors(tensor_info, tensor_base_data_list, tensor_stat_data_list):
+    @security_off_wrap
-    """Print read tensors info."""
+    def test_read_nan_tensors_base_stat(self):
-    if GENERATE_GOLDEN:
+        tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_3)
-        f_write = open(test_name + ".expected", "w")
+        tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_3)
-    else:
+
-        f_write = open(test_name + ".actual", "w")
+        if self.GENERATE_GOLDEN:
-    for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info,
+            self.print_read_tensors(self.tensor_info_3, tensor_base_data_list, tensor_stat_data_list, 2, False)
-                                                                         tensor_base_data_list,
+        else:
-                                                                         tensor_stat_data_list)):
+            self.compare_expect_actual_result(self.tensor_info_3, tensor_base_data_list, tensor_stat_data_list, 2)
-        f_write.write(
+
-            "-----------------------------------------------------------\n")
+    @security_off_wrap
-        f_write.write("tensor_info_" + str(x+1) + " attributes:\n")
+    def test_read_inv_tensors_base_stat(self):
-        f_write.write("node name = " + tensor_info_item.node_name + "\n")
+        tensor_base_data_list = self.debugger_backend.read_tensor_base(self.tensor_info_4)
-        f_write.write("slot = " + str(tensor_info_item.slot) + "\n")
+        tensor_stat_data_list = self.debugger_backend.read_tensor_stats(self.tensor_info_4)
-        f_write.write("iteration = " + str(tensor_info_item.iteration) + "\n")
+
-        f_write.write("rank_id = " + str(tensor_info_item.rank_id) + "\n")
+        if self.GENERATE_GOLDEN:
-        f_write.write("root_graph_id = " +
+            self.print_read_tensors(self.tensor_info_4, tensor_base_data_list, tensor_stat_data_list, 3, True)
-                      str(tensor_info_item.root_graph_id) + "\n")
+        else:
-        f_write.write("is_output = " +
+            self.compare_expect_actual_result(self.tensor_info_4, tensor_base_data_list, tensor_stat_data_list, 3)
-                      str(tensor_info_item.is_output) + "\n")
+
-        f_write.write("\n")
+    def compare_expect_actual_result(self, tensor_info, tensor_base_data_list, tensor_stat_data_list, test_index):
-        f_write.write("tensor_base_info:\n")
+        """Compare actual result with golden file."""
-        f_write.write(str(tensor_base) + "\n")
+        golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
-        f_write.write("\n")
+                                                    self.test_name + "_expected.json"))
-        f_write.write("tensor_stat_info:\n")
+        with open(golden_file) as f:
-        f_write.write(str(tensor_stat) + '\n')
+            expected_list = json.load(f)
-    f_write.close()
+
        for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info,
                                                                             tensor_base_data_list,
                                                                             tensor_stat_data_list)):
            test_id = "test"+ str(test_index+x+1)
            info_json = expected_list[x+test_index][test_id]['tensor_info']
            base_json = expected_list[x+test_index][test_id]['tensor_base_info']
            stat_json = expected_list[x+test_index][test_id]['tensor_stat_info']
            assert tensor_info_item.node_name == info_json['node_name']
            assert tensor_info_item.slot == info_json['slot']
            assert tensor_info_item.iteration == info_json['iteration']
            assert tensor_info_item.rank_id == info_json['rank_id']
            assert tensor_info_item.root_graph_id == info_json['root_graph_id']
            assert tensor_info_item.is_output == info_json['is_output']
            assert tensor_base.data_size == base_json['size_in_bytes']
            assert tensor_base.dtype == base_json['debugger_dtype']
            assert tensor_base.shape == base_json['shape']
            assert tensor_stat.data_size == stat_json['size_in_bytes']
            assert tensor_stat.dtype == stat_json['debugger_dtype']
            assert tensor_stat.shape == stat_json['shape']
            assert tensor_stat.is_bool == stat_json['is_bool']
            assert tensor_stat.max_value == stat_json['max_vaue']
            assert tensor_stat.min_value == stat_json['min_value']
            assert tensor_stat.avg_value == stat_json['avg_value']
            assert tensor_stat.count == stat_json['count']
            assert tensor_stat.neg_zero_count == stat_json['neg_zero_count']
            assert tensor_stat.pos_zero_count == stat_json['pos_zero_count']
            assert tensor_stat.nan_count == stat_json['nan_count']
            assert tensor_stat.neg_inf_count == stat_json['neg_inf_count']
            assert tensor_stat.pos_inf_count == stat_json['pos_inf_count']
            assert tensor_stat.zero_count == stat_json['zero_count']
    def print_read_tensors(self, tensor_info, tensor_base_data_list, tensor_stat_data_list, test_index, is_print):
        """Print read tensors info."""
        for x, (tensor_info_item, tensor_base, tensor_stat) in enumerate(zip(tensor_info,
                                                                             tensor_base_data_list,
                                                                             tensor_stat_data_list)):
            test_name = "test" + str(test_index+x+1)
            self.tensor_json.append({
                test_name: {
                    'tensor_info': {
                        'node_name': tensor_info_item.node_name,
                        'slot': tensor_info_item.slot,
                        'iteration': tensor_info_item.iteration,
                        'rank_id': tensor_info_item.rank_id,
                        'root_graph_id': tensor_info_item.root_graph_id,
                        'is_output': tensor_info_item.is_output
                    },
                    'tensor_base_info': {
                        'size_in_bytes': tensor_base.data_size,
                        'debugger_dtype': tensor_base.dtype,
                        'shape': tensor_base.shape
                    },
                    'tensor_stat_info': {
                        'size_in_bytes': tensor_stat.data_size,
                        'debugger_dtype': tensor_stat.dtype,
                        'shape': tensor_stat.shape,
                        'is_bool': tensor_stat.is_bool,
                        'max_vaue': tensor_stat.max_value,
                        'min_value': tensor_stat.min_value,
                        'avg_value': tensor_stat.avg_value,
                        'count': tensor_stat.count,
                        'neg_zero_count': tensor_stat.neg_zero_count,
                        'pos_zero_count': tensor_stat.pos_zero_count,
                        'nan_count': tensor_stat.nan_count,
                        'neg_inf_count': tensor_stat.neg_inf_count,
                        'pos_inf_count': tensor_stat.pos_inf_count,
                        'zero_count': tensor_stat.zero_count
                    }
                }})
        if is_print:
            with open(self.test_name + "_expected.json", "w") as dump_f:
                json.dump(self.tensor_json, dump_f, indent=4, separators=(',', ': '))
--- a/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors_nonexist_node.py
+++ b/tests/ut/python/debugger/gpu_tests/test_sync_read_tensors_nonexist_node.py
@ -1,107 +0,0 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Read tensor test script for offline debugger APIs.
 """
 import shutil
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import compare_actual_with_expected, build_dump_structure
 from tests.security_utils import security_off_wrap
 GENERATE_GOLDEN = False
 test_name = "sync_read_tensors_nonexist_node"
@security_off_wrap
 def test_sync_trans_read_tensors_nonexist_node():
    tensor1 = np.array([32.0, 4096.0], np.float32)
    name1 = "CudnnUniformReal.CudnnUniformReal-op391.0.0."
    info1 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
    tensor2 = np.array([[0.0, 32.0, 4096.0], [4.5, 6.78, -11.0]], np.float32)
    name2 = "ReluGradV2.ReluGradV2-op406.0.0."
    info2 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                         slot=1, iteration=1, rank_id=0, root_graph_id=0, is_output=False)
    # non-existing tensor with wrong op name
    info3 = d.TensorInfo(node_name="Default/CudnnUniformReal-op390",
                         slot=0, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
    # non-existing tensor with wrong iteration number
    info4 = d.TensorInfo(node_name="Gradients/Default/network-WithLossCell/_backbone-AlexNet/gradReLU/ReluGradV2-op406",
                         slot=1, iteration=0, rank_id=0, root_graph_id=0, is_output=False)
    tensor_name = [name1, name2]
    tensor_create_info = [info1, info2]
    tensor_list = [tensor1, tensor2]
    temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_create_info)
    tensor_check_info = [info3, info4]
    debugger_backend = d.DbgServices(dump_file_path=temp_dir)
    _ = debugger_backend.initialize(
        net_name="alexnet", is_sync_mode=True)
    tensor_data = debugger_backend.read_tensors(tensor_check_info)
    # Check the length of tensor list
    assert len(tensor_check_info) == 2
    assert len(tensor_data) == 2
    print_read_tensors(tensor_check_info, tensor_data)
    shutil.rmtree(temp_dir)
    if not GENERATE_GOLDEN:
        assert compare_actual_with_expected(test_name)
 def print_read_tensors(tensor_info, tensor_data):
    """Print read tensors."""
    if GENERATE_GOLDEN:
        f_write = open(test_name + ".expected", "w")
    else:
        f_write = open(test_name + ".actual", "w")
    for x, _ in enumerate(tensor_info):
        f_write.write(
            "-----------------------------------------------------------\n")
        f_write.write("tensor_info_" + str(x + 1) + " attributes:\n")
        f_write.write("node name =  " + tensor_info[x].node_name + "\n")
        f_write.write("slot =  " + str(tensor_info[x].slot) + "\n")
        f_write.write("iteration =  " + str(tensor_info[x].iteration) + "\n")
        f_write.write("rank_id =  " + str(tensor_info[x].rank_id) + "\n")
        f_write.write("root_graph_id =  " +
                      str(tensor_info[x].root_graph_id) + "\n")
        f_write.write("is_output =  " +
                      str(tensor_info[x].is_output) + "\n")
        f_write.write("\n")
        f_write.write("tensor_data_" + str(x + 1) + " attributes:\n")
        f_write.write("data (printed in uint8) =  " + str(np.frombuffer(
            tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n")
        py_byte_size = len(tensor_data[x].data_ptr)
        c_byte_size = tensor_data[x].data_size
        if c_byte_size != py_byte_size:
            f_write.write("The python byte size of  " + str(py_byte_size) +
                          "  does not match the C++ byte size of  " + str(c_byte_size) + "\n")
        f_write.write("size in bytes =  " +
                      str(tensor_data[x].data_size) + "\n")
        f_write.write("debugger dtype =  " + str(tensor_data[x].dtype) + "\n")
        f_write.write("shape =  " + str(tensor_data[x].shape) + "\n")
    f_write.close()
 if __name__ == "__main__":
    test_sync_trans_read_tensors_nonexist_node()
--- a/tests/ut/python/debugger/gpu_tests/test_sync_watchpoints.py
+++ b/tests/ut/python/debugger/gpu_tests/test_sync_watchpoints.py
@ -1,168 +0,0 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Watchpoints test script for offline debugger APIs.
 """
 import shutil
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import compare_actual_with_expected, build_dump_structure
 from tests.security_utils import security_off_wrap
 GENERATE_GOLDEN = False
 test_name = "sync_watchpoints"
@security_off_wrap
 def test_sync_trans_false_watchpoints():
    if GENERATE_GOLDEN:
        f_write = open(test_name + ".expected", "w")
    else:
        f_write = open(test_name + ".actual", "w")
    name1 = "Conv2D.Conv2D-op369.0.0."
    tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
                         [-1.3931e-02, 8.9359e-04, -1.1520e-02],
                         [-6.3248e-03, 1.8749e-03, 1.0132e-02]],
                        [[-2.5520e-03, -6.0005e-03, -5.1918e-03],
                         [-2.7866e-03, 2.5487e-04, 8.4782e-04],
                         [-4.6310e-03, -8.9111e-03, -8.1778e-05]],
                        [[1.3914e-03, 6.0844e-04, 1.0643e-03],
                         [-2.0966e-02, -1.2865e-03, -1.8692e-03],
                         [-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
    info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
                         slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
    name2 = "Parameter.fc2.bias.0.0."
    tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
                        2.1177532e-07, 2.9952851e-06], np.float32)
    info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
                         slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
    tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
                        -5.1546101e-07, 6.0798648e-06], np.float32)
    info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc2.bias",
                         slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
    name3 = "Parameter.fc3.bias.0.0."
    tensor4 = np.array([2.2930422e-04, -3.6369250e-04, 7.1337068e-04, -1.9567949e-05], np.float32)
    info4 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias",
                         slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
    tensor_info = [info1, info2, info3, info4]
    tensor_name = [name1, name2, name2, name3]
    tensor_list = [tensor1, tensor2, tensor3, tensor4]
    temp_dir = build_dump_structure(tensor_name, tensor_list, "alexnet", tensor_info)
    debugger_backend = d.DbgServices(dump_file_path=temp_dir)
    _ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=True)
    # NOTES:
    # -> watch_condition=6 is MIN_LT
    # -> watch_condition=18 is CHANGE_TOO_LARGE
    # -> watch_condition=20 is NOT_CHANGE
    # test 1: watchpoint set and hit (watch_condition=6)
    param1 = d.Parameter(name="param", disabled=False, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
                                                         "Conv2D-op369":
                                                         {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                          }}, parameter_list=[param1])
    watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
    if len(watchpoint_hits_test_1) != 1:
        f_write.write(
            "ERROR -> test 1: watchpoint set but not hit just once\n")
    print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write)
    # test 2: watchpoint remove and ensure it's not hit
    _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
    watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_2:
        f_write.write("ERROR -> test 2: watchpoint removed but hit\n")
    # test 3: watchpoint set and not hit, then remove
    param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/"
                                                         "Conv2D-op369":
                                                         {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                          }}, parameter_list=[param2])
    watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
    if watchpoint_hits_test_3:
        f_write.write(
            "ERROR -> test 3: watchpoint set but not supposed to be hit\n")
    _ = debugger_backend.remove_watchpoint(watchpoint_id=2)
    # test 4: weight change watchpoint set and hit
    param_abs_mean_update_ratio_gt = d.Parameter(
        name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
    param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
    _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                                                         "Parameter[6]_11/fc2.bias":
                                                         {"rank_id": [0], "root_graph_id": [0], "is_output": True
                                                          }}, parameter_list=[param_abs_mean_update_ratio_gt,
                                                                              param_epsilon])
    watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
    if len(watchpoint_hits_test_4) != 1:
        f_write.write("ERROR -> test 4: watchpoint weight change set but not hit just once\n")
    print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write)
    f_write.close()
    shutil.rmtree(temp_dir)
    if not GENERATE_GOLDEN:
        assert compare_actual_with_expected(test_name)
 def print_watchpoint_hits(watchpoint_hits, test_id, f_write):
    """Print watchpoint hits."""
    for x, _ in enumerate(watchpoint_hits):
        f_write.write(
            "-----------------------------------------------------------\n")
        f_write.write("watchpoint_hit for test_%u attributes:" %
                      test_id + "\n")
        f_write.write("name =  " + watchpoint_hits[x].name + "\n")
        f_write.write("slot =  " + str(watchpoint_hits[x].slot) + "\n")
        f_write.write("condition =  " +
                      str(watchpoint_hits[x].condition) + "\n")
        f_write.write("watchpoint_id =  " +
                      str(watchpoint_hits[x].watchpoint_id) + "\n")
        for p, _ in enumerate(watchpoint_hits[x].parameters):
            f_write.write("parameter  " + str(p) + "  name =  " +
                          watchpoint_hits[x].parameters[p].name + "\n")
            f_write.write("parameter  " + str(p) + "  disabled =  " +
                          str(watchpoint_hits[x].parameters[p].disabled) + "\n")
            f_write.write("parameter  " + str(p) + "  value =  " +
                          str(watchpoint_hits[x].parameters[p].value) + "\n")
            f_write.write("parameter  " + str(p) + "  hit =  " +
                          str(watchpoint_hits[x].parameters[p].hit) + "\n")
            f_write.write("parameter  " + str(p) + "  actual_value =  " +
                          str(watchpoint_hits[x].parameters[p].actual_value) + "\n")
        f_write.write("error code =  " +
                      str(watchpoint_hits[x].error_code) + "\n")
        f_write.write("rank_id =  " +
                      str(watchpoint_hits[x].rank_id) + "\n")
        f_write.write("root_graph_id =  " +
                      str(watchpoint_hits[x].root_graph_id) + "\n")
 if __name__ == "__main__":
    test_sync_trans_false_watchpoints()
--- a/tests/ut/python/debugger/gpu_tests/test_watchpoints.py
+++ b/tests/ut/python/debugger/gpu_tests/test_watchpoints.py
@ -0,0 +1,238 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """
 Watchpoints test script for offline debugger APIs.
 """
 import os
 import json
 import shutil
 import numpy as np
 import mindspore.offline_debug.dbg_services as d
 from dump_test_utils import build_dump_structure
 from tests.security_utils import security_off_wrap
 class TestOfflineWatchpoints:
    """Test watchpoint for offline debugger."""
    GENERATE_GOLDEN = False
    test_name = "watchpoints"
    watchpoint_hits_json = []
    temp_dir = ''
    @classmethod
    def setup_class(cls):
        """Init setup for offline watchpoints test"""
        name1 = "Conv2D.Conv2D-op369.0.0.1"
        tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
                             [-1.3931e-02, 8.9359e-04, -1.1520e-02],
                             [-6.3248e-03, 1.8749e-03, 1.0132e-02]],
                            [[-2.5520e-03, -6.0005e-03, -5.1918e-03],
                             [-2.7866e-03, 2.5487e-04, 8.4782e-04],
                             [-4.6310e-03, -8.9111e-03, -8.1778e-05]],
                            [[1.3914e-03, 6.0844e-04, 1.0643e-03],
                             [-2.0966e-02, -1.2865e-03, -1.8692e-03],
                             [-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
        info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
                             slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
        name2 = "Parameter.fc2.bias.0.0.2"
        tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
                            2.1177532e-07, 2.9952851e-06], np.float32)
        info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                             "Parameter[6]_11/fc2.bias",
                             slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
        tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
                            -5.1546101e-07, 6.0798648e-06], np.float32)
        info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                             "Parameter[6]_11/fc2.bias",
                             slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
        name3 = "CudnnUniformReal.CudnnUniformReal-op391.0.0.3"
        tensor4 = np.array([-32.0, -4096.0], np.float32)
        info4 = d.TensorInfo(node_name="Default/CudnnUniformReal-op391",
                             slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
        tensor_info = [info1, info2, info3, info4]
        tensor_name = [name1, name2, name2, name3]
        tensor_list = [tensor1, tensor2, tensor3, tensor4]
        cls.temp_dir = build_dump_structure(tensor_name, tensor_list, "Test", tensor_info)
    @classmethod
    def teardown_class(cls):
        shutil.rmtree(cls.temp_dir)
    @security_off_wrap
    def test_sync_add_remove_watchpoints_hit(self):
        # NOTES: watch_condition=6 is MIN_LT
        # watchpoint set and hit (watch_condition=6), then remove it
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
        param = d.Parameter(name="param", disabled=False, value=0.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                            check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
                                                             "/conv1-Conv2d/Conv2D-op369":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                              }}, parameter_list=[param])
        # add second watchpoint to check the watchpoint hit in correct order
        param1 = d.Parameter(name="param", disabled=False, value=10.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                            check_node_list={"Default/CudnnUniformReal-op391":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                              }}, parameter_list=[param1])
        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
        assert len(watchpoint_hits_test) == 2
        if self.GENERATE_GOLDEN:
            self.print_watchpoint_hits(watchpoint_hits_test, 0, False)
        else:
            self.compare_expect_actual_result(watchpoint_hits_test, 0)
        _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
        watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
        assert len(watchpoint_hits_test_1) == 1
    @security_off_wrap
    def test_sync_add_remove_watchpoints_not_hit(self):
        # watchpoint set and not hit(watch_condition=6), then remove
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
        param = d.Parameter(name="param", disabled=False, value=-1000.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                            check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
                                                             "/conv1-Conv2d/Conv2D-op369":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                              }}, parameter_list=[param])
        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
        assert not watchpoint_hits_test
        _ = debugger_backend.remove_watchpoint(watchpoint_id=2)
    @security_off_wrap
    def test_sync_weight_change_watchpoints_hit(self):
        # NOTES: watch_condition=18 is CHANGE_TOO_LARGE
        # weight change watchpoint set and hit(watch_condition=18)
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=True)
        param_abs_mean_update_ratio_gt = d.Parameter(
            name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
        param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
                                            check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
                                                             "Parameter[6]_11/fc2.bias":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": True
                                                              }}, parameter_list=[param_abs_mean_update_ratio_gt,
                                                                                  param_epsilon])
        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=3)
        assert len(watchpoint_hits_test) == 1
        if self.GENERATE_GOLDEN:
            self.print_watchpoint_hits(watchpoint_hits_test, 2, True)
        else:
            self.compare_expect_actual_result(watchpoint_hits_test, 2)
    @security_off_wrap
    def test_async_add_remove_watchpoint_hit(self):
        # watchpoint set and hit(watch_condition=6) in async mode, then remove
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
        param = d.Parameter(name="param", disabled=False, value=0.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
                                            check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
                                                             "/conv1-Conv2d/Conv2D-op369":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                              }}, parameter_list=[param])
        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
        assert len(watchpoint_hits_test) == 1
        if not self.GENERATE_GOLDEN:
            self.compare_expect_actual_result(watchpoint_hits_test, 0)
        _ = debugger_backend.remove_watchpoint(watchpoint_id=1)
        watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
        assert not watchpoint_hits_test_1
    @security_off_wrap
    def test_async_add_remove_watchpoints_not_hit(self):
        # watchpoint set and not hit(watch_condition=6) in async mode, then remove
        debugger_backend = d.DbgServices(dump_file_path=self.temp_dir)
        _ = debugger_backend.initialize(net_name="Test", is_sync_mode=False)
        param = d.Parameter(name="param", disabled=False, value=-1000.0)
        _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
                                            check_node_list={"Default/network-WithLossCell/_backbone-AlexNet"
                                                             "/conv1-Conv2d/Conv2D-op369":
                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
                                                              }}, parameter_list=[param])
        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=2)
        assert not watchpoint_hits_test
        _ = debugger_backend.remove_watchpoint(watchpoint_id=2)
    def compare_expect_actual_result(self, watchpoint_hits_list, test_index):
        """Compare actual result with golden file."""
        golden_file = os.path.realpath(os.path.join("../data/dump/gpu_dumps/golden/",
                                                    self.test_name + "_expected.json"))
        with open(golden_file) as f:
            expected_list = json.load(f)
            for x, watchpoint_hits in enumerate(watchpoint_hits_list):
                test_id = "watchpoint_hit" + str(test_index+x+1)
                info = expected_list[x+test_index][test_id]
                assert watchpoint_hits.name == info['name']
                assert watchpoint_hits.slot == info['slot']
                assert watchpoint_hits.condition == info['condition']
                assert watchpoint_hits.watchpoint_id == info['watchpoint_id']
                assert watchpoint_hits.error_code == info['error_code']
                assert watchpoint_hits.rank_id == info['rank_id']
                assert watchpoint_hits.root_graph_id == info['root_graph_id']
                for p, _ in enumerate(watchpoint_hits.parameters):
                    parameter = "parameter" + str(p)
                    assert watchpoint_hits.parameters[p].name == info['paremeter'][p][parameter]['name']
                    assert watchpoint_hits.parameters[p].disabled == info['paremeter'][p][parameter]['disabled']
                    assert watchpoint_hits.parameters[p].value == info['paremeter'][p][parameter]['value']
                    assert watchpoint_hits.parameters[p].hit == info['paremeter'][p][parameter]['hit']
                    assert watchpoint_hits.parameters[p].actual_value == info['paremeter'][p][parameter]['actual_value']
    def print_watchpoint_hits(self, watchpoint_hits_list, test_index, is_print):
        """Print watchpoint hits."""
        for x, watchpoint_hits in enumerate(watchpoint_hits_list):
            parameter_json = []
            for p, _ in enumerate(watchpoint_hits.parameters):
                parameter = "parameter" + str(p)
                parameter_json.append({
                    parameter: {
                        'name': watchpoint_hits.parameters[p].name,
                        'disabled': watchpoint_hits.parameters[p].disabled,
                        'value': watchpoint_hits.parameters[p].value,
                        'hit': watchpoint_hits.parameters[p].hit,
                        'actual_value': watchpoint_hits.parameters[p].actual_value
                    }
                })
            watchpoint_hit = "watchpoint_hit" + str(test_index+x+1)
            self.watchpoint_hits_json.append({
                watchpoint_hit: {
                    'name': watchpoint_hits.name,
                    'slot': watchpoint_hits.slot,
                    'condition': watchpoint_hits.condition,
                    'watchpoint_id': watchpoint_hits.watchpoint_id,
                    'paremeter': parameter_json,
                    'error_code': watchpoint_hits.error_code,
                    'rank_id': watchpoint_hits.rank_id,
                    'root_graph_id': watchpoint_hits.root_graph_id
                }
            })
        if is_print:
            with open(self.test_name + "_expected.json", "w") as dump_f:
                json.dump(self.watchpoint_hits_json, dump_f, indent=4, separators=(',', ': '))