forked from mindspore-Ecosystem/mindspore
move the debugger python api to mindinsight package
This commit is contained in:
parent
835e2e546e
commit
e7ed27ae92
|
@ -1,92 +0,0 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""DebuggerTensor."""
|
|
||||||
from abc import ABC
|
|
||||||
|
|
||||||
|
|
||||||
class DebuggerTensor(ABC):
|
|
||||||
"""
|
|
||||||
The tensor with specific rank, iteration and debugging info.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
- Users should not instantiate this class manually.
|
|
||||||
- The instances of this class is immutable.
|
|
||||||
- A DebuggerTensor is always the output tensor of a node.
|
|
||||||
"""
|
|
||||||
@property
|
|
||||||
def node(self):
|
|
||||||
"""
|
|
||||||
Get the node that outputs this tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Node, the node that outputs this tensor.
|
|
||||||
"""
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
"""
|
|
||||||
Get the name of this tensor.
|
|
||||||
|
|
||||||
The name is composed of full name of a node and the slot number.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str, the name of this tensor.
|
|
||||||
"""
|
|
||||||
return ""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def slot(self):
|
|
||||||
"""
|
|
||||||
Get slot.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int, the slot of the tensor on the node.
|
|
||||||
"""
|
|
||||||
return -1
|
|
||||||
|
|
||||||
@property
|
|
||||||
def iteration(self):
|
|
||||||
"""
|
|
||||||
Get the iteration for this tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int, the iteration for this tensor.
|
|
||||||
"""
|
|
||||||
return -1
|
|
||||||
|
|
||||||
@property
|
|
||||||
def rank(self):
|
|
||||||
"""
|
|
||||||
Get the rank for this tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
int, the rank for this tensor.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return -1
|
|
||||||
|
|
||||||
def get_value(self):
|
|
||||||
"""
|
|
||||||
Get the value of the tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
numpy.ndarray, the value of the debugger tensor.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_affected_nodes(self):
|
|
||||||
"""
|
|
||||||
Get the nodes that use current tensor as input.
|
|
||||||
"""
|
|
|
@ -1,138 +0,0 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""Debugger python API."""
|
|
||||||
|
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
from mindspore.offline_debug.debugger_tensor import DebuggerTensor
|
|
||||||
from mindspore.offline_debug.node import Node
|
|
||||||
from mindspore.offline_debug.watchpoints import WatchpointBase, WatchpointHit
|
|
||||||
|
|
||||||
|
|
||||||
class DumpAnalyzer:
|
|
||||||
"""
|
|
||||||
Analyzer to inspect the dump data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
summary_dir (str): The path of the summary directory which contains
|
|
||||||
dump folder.
|
|
||||||
mem_limit (int, optional): The memory limit for this debugger session in
|
|
||||||
MB. Default: None, which means no limit.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, summary_dir, mem_limit=None):
|
|
||||||
self._summary_dir = summary_dir
|
|
||||||
self._mem_limit = mem_limit
|
|
||||||
|
|
||||||
def export_graphs(self, output_dir=None):
|
|
||||||
"""
|
|
||||||
Export the computational graph(s) in xlsx file(s) to the output_dir.
|
|
||||||
|
|
||||||
The file(s) will contain the stack info of graph nodes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
output_dir (str, optional): Output directory to save the file.
|
|
||||||
Default: None, which means to use the current working directory.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str. The path of the generated file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def select_nodes(
|
|
||||||
self,
|
|
||||||
query_string,
|
|
||||||
use_regex=False,
|
|
||||||
match_target="name",
|
|
||||||
case_sensitive=True) -> Iterable[Node]:
|
|
||||||
"""
|
|
||||||
Select nodes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query_string (str): Query string. For a node to be selected, the
|
|
||||||
match target field must contains or matches the query string.
|
|
||||||
use_regex (bool): Indicates whether query is a regex. Default: False.
|
|
||||||
match_target (str, optional): The field to search when selecting
|
|
||||||
nodes. Available values are "name", "stack".
|
|
||||||
"name" means to search the name of the nodes in the
|
|
||||||
graph. "stack" means the stack info of
|
|
||||||
the node. Default: "name".
|
|
||||||
case_sensitive (bool, optional): Whether case-sensitive when
|
|
||||||
selecting tensors. Default: True.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[Node], the matched nodes.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def select_tensors(
|
|
||||||
self,
|
|
||||||
query_string,
|
|
||||||
use_regex=False,
|
|
||||||
match_target="name",
|
|
||||||
iterations=None,
|
|
||||||
ranks=None,
|
|
||||||
slots=None,
|
|
||||||
case_sensitive=True) -> Iterable[DebuggerTensor]:
|
|
||||||
"""
|
|
||||||
Select tensors.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query_string (str): Query string. For a tensor to be selected, the
|
|
||||||
match target field must contains or matches the query string.
|
|
||||||
use_regex (bool): Indicates whether query is a regex. Default: False.
|
|
||||||
match_target (str, optional): The field to search when selecting
|
|
||||||
tensors. Available values are "name", "stack".
|
|
||||||
"name" means to search the name of the tensors in the
|
|
||||||
graph. "name" is composed of graph node's full_name
|
|
||||||
and the tensor's slot number. "stack" means the stack info of
|
|
||||||
the node that outputs this tensor. Default: "name".
|
|
||||||
iterations (list[int], optional): The iterations to select. Default:
|
|
||||||
None, which means all iterations will be selected.
|
|
||||||
ranks (list(int], optional): The ranks to select. Default: None,
|
|
||||||
which means all ranks will be selected.
|
|
||||||
slots (list[int], optional): The slot of the selected tensor.
|
|
||||||
Default: None, which means all slots will be selected.
|
|
||||||
case_sensitive (bool, optional): Whether case-sensitive when
|
|
||||||
selecting tensors. Default: True.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[DebuggerTensor], the matched tensors.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_iterations(self) -> Iterable[int]:
|
|
||||||
"""Get the available iterations this run."""
|
|
||||||
|
|
||||||
def get_ranks(self) -> Iterable[int]:
|
|
||||||
"""Get the available ranks in this run."""
|
|
||||||
|
|
||||||
def check_watchpoints(
|
|
||||||
self,
|
|
||||||
watchpoints: Iterable[WatchpointBase]) -> Iterable[WatchpointHit]:
|
|
||||||
"""
|
|
||||||
Check the given watch points on specified nodes(if available) on the
|
|
||||||
given iterations(if available) in a batch.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
For speed, all watchpoints for the iteration should be given at
|
|
||||||
the same time to avoid reading tensors len(watchpoints) times.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
watchpoints (Iterable[WatchpointBase]): The list of watchpoints.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[WatchpointHit], the watchpoint hist list is carefully
|
|
||||||
sorted so that the user can see the most import hit on the
|
|
||||||
top of the list. When there are many many watchpoint hits,
|
|
||||||
we will display the list in a designed clear way.
|
|
||||||
"""
|
|
|
@ -1,75 +0,0 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""Node in the computational graph."""
|
|
||||||
from abc import ABC
|
|
||||||
|
|
||||||
|
|
||||||
class Node(ABC):
|
|
||||||
"""Node in the computational graph."""
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
"""
|
|
||||||
Get the full name of this node.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str, the full name of the node.
|
|
||||||
"""
|
|
||||||
return ""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def stack(self):
|
|
||||||
"""Get stack info."""
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_input_tensors(
|
|
||||||
self,
|
|
||||||
iterations=None,
|
|
||||||
ranks=None,
|
|
||||||
slots=None):
|
|
||||||
"""
|
|
||||||
Get the input tensors of the node.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[DebuggerTensor], the input tensors of the node.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_output_tensors(
|
|
||||||
self,
|
|
||||||
iterations=None,
|
|
||||||
ranks=None,
|
|
||||||
slots=None):
|
|
||||||
"""
|
|
||||||
Get the output tensors of this node.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[DebuggerTensor], the output tensors of the node.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_input_nodes(self):
|
|
||||||
"""
|
|
||||||
Get the input nodes of this node.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[Node], the input nodes of this node.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_output_nodes(self):
|
|
||||||
"""
|
|
||||||
Get the nodes that use the output tensors of this node.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Iterable[Node], the output nodes of this node.
|
|
||||||
"""
|
|
|
@ -1,128 +0,0 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""Watchpoints."""
|
|
||||||
from mindspore.offline_debug.debugger_tensor import DebuggerTensor
|
|
||||||
|
|
||||||
|
|
||||||
class WatchpointBase:
|
|
||||||
"""
|
|
||||||
Base class for watchpoints.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
- The watchpoint is bounded with tensor names.
|
|
||||||
- If multiple checking items is specified for one watch point instance,
|
|
||||||
a tensor needs to trigger all of them to trigger the watchpoint.
|
|
||||||
"""
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
"""Get the name for the watchpoint."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def check(self):
|
|
||||||
"""
|
|
||||||
Check the watchpoint against the tensors.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list[WatchpointHit], the hits of the watchpoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class WatchpointHit:
|
|
||||||
"""
|
|
||||||
Watchpoint hit.
|
|
||||||
|
|
||||||
Note:
|
|
||||||
- This class is not meant to be instantiated by user.
|
|
||||||
- The instances of this class is immutable.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tensor (DebuggerTensor): The tensor which hits the watchpoint.
|
|
||||||
watchpoint (WatchpointBase): The WatchPointBase object initialized with
|
|
||||||
user setting value.
|
|
||||||
watchpoint_hit_detail (WatchpointBase): The WatchPointBase object
|
|
||||||
initialized with actual value of the Tensor.
|
|
||||||
error_code: The code describing error.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
tensor: DebuggerTensor,
|
|
||||||
watchpoint: WatchpointBase,
|
|
||||||
watchpoint_hit_detail: WatchpointBase,
|
|
||||||
error_code):
|
|
||||||
self._tensor = tensor
|
|
||||||
self._watchpoint = watchpoint
|
|
||||||
self._error_code = error_code
|
|
||||||
self._watchpoint_hit_detail = watchpoint_hit_detail
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
if self._error_code:
|
|
||||||
return f"Watchpoint {self._watchpoint.name} check failed " \
|
|
||||||
f"on tensor {self._tensor.name}. " \
|
|
||||||
f"Error detail: error detail."
|
|
||||||
|
|
||||||
return f"Watchpoint {self._watchpoint.name} triggered on " \
|
|
||||||
f"tensor {self._tensor.name}. " \
|
|
||||||
f"The setting for watchpoint is mean_gt=0.2, abs_mean_gt=0.3." \
|
|
||||||
f"The actual value of the tensor is " \
|
|
||||||
f"mean_gt=0.21, abs_mean_gt=0.35."
|
|
||||||
|
|
||||||
@property
|
|
||||||
def tensor(self) -> DebuggerTensor:
|
|
||||||
"""Get the tensor for this watchpoint hit."""
|
|
||||||
return self._tensor
|
|
||||||
|
|
||||||
def get_watchpoint(self):
|
|
||||||
"""Get the original watchpoint."""
|
|
||||||
return self._watchpoint
|
|
||||||
|
|
||||||
def get_hit_detail(self):
|
|
||||||
"""Get the actual values for the thresholds in the watchpoint."""
|
|
||||||
return self._watchpoint_hit_detail
|
|
||||||
|
|
||||||
|
|
||||||
class TensorTooLargeWatchpoint(WatchpointBase):
|
|
||||||
"""
|
|
||||||
Tensor too large watchpoint.
|
|
||||||
|
|
||||||
When all specified checking conditions were satisfied, this watchpoint would
|
|
||||||
be hit after a check.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tensors (Iterable[DebuggerTensor]): The tensors to check.
|
|
||||||
abs_mean_gt (float, optional): The threshold for mean of the absolute
|
|
||||||
value of the tensor. When the actual value was greater than this
|
|
||||||
threshold, this checking condition would be satisfied.
|
|
||||||
max_gt (float, optional): The threshold for maximum of the tensor. When
|
|
||||||
the actual value was greater than this threshold, this checking
|
|
||||||
condition would be satisfied.
|
|
||||||
min_gt (float, optional): The threshold for minimum of the tensor. When
|
|
||||||
the actual value was greater than this threshold, this checking
|
|
||||||
condition would be satisfied.
|
|
||||||
mean_gt (float, optional): The threshold for mean of the tensor. When
|
|
||||||
the actual value was greater than this threshold, this checking
|
|
||||||
condition would be satisfied.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, tensors,
|
|
||||||
abs_mean_gt=None, max_gt=None, min_gt=None, mean_gt=None):
|
|
||||||
self._tensors = tensors
|
|
||||||
self._abs_mean_gt = abs_mean_gt
|
|
||||||
self._max_gt = max_gt
|
|
||||||
self._min_gt = min_gt
|
|
||||||
self._mean_gt = mean_gt
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
return "TensorTooLarge"
|
|
|
@ -1,80 +0,0 @@
|
||||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
"""Test debug API."""
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from mindspore.offline_debug.dump_analyzer import DumpAnalyzer
|
|
||||||
from mindspore.offline_debug.watchpoints import TensorTooLargeWatchpoint
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Feature under development.")
|
|
||||||
def test_export_graphs():
|
|
||||||
"""Test debug API."""
|
|
||||||
my_run = DumpAnalyzer(
|
|
||||||
summary_dir="/path/to/summary-dir1"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Export the info about computational graph. Should support multi graphs.
|
|
||||||
my_run.export_graphs()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Feature under development.")
|
|
||||||
def test_select_tensors():
|
|
||||||
"""Test debug API."""
|
|
||||||
my_run = DumpAnalyzer(
|
|
||||||
summary_dir="/path/to/summary-dir2"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Find the interested tensors.
|
|
||||||
matched_tensors = my_run.select_tensors(".*conv1.*", use_regex=True)
|
|
||||||
assert matched_tensors == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Feature under development.")
|
|
||||||
def test_check_watchpoints_all_iterations():
|
|
||||||
"""Test debug API."""
|
|
||||||
my_run = DumpAnalyzer(
|
|
||||||
summary_dir="/path/to/summary-dir3"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Checking all the iterations.
|
|
||||||
watchpoints = [
|
|
||||||
TensorTooLargeWatchpoint(
|
|
||||||
tensors=my_run.select_tensors(
|
|
||||||
"(*.weight^)|(*.bias^)", use_regex=True),
|
|
||||||
abs_mean_gt=0.1)
|
|
||||||
]
|
|
||||||
|
|
||||||
watch_point_hits = my_run.check_watchpoints(watchpoints=watchpoints)
|
|
||||||
assert watch_point_hits == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="Feature under development.")
|
|
||||||
def test_check_watchpoints_one_iteration():
|
|
||||||
"""Test debug API."""
|
|
||||||
my_run = DumpAnalyzer(
|
|
||||||
summary_dir="/path/to/summary-dir4"
|
|
||||||
)
|
|
||||||
# Checking specific iteration.
|
|
||||||
watchpoints = [
|
|
||||||
TensorTooLargeWatchpoint(
|
|
||||||
tensors=my_run.select_tensors(
|
|
||||||
"(*.weight^)|(*.bias^)", use_regex=True,
|
|
||||||
iterations=[1]),
|
|
||||||
abs_mean_gt=0.1)
|
|
||||||
]
|
|
||||||
|
|
||||||
watch_point_hits = my_run.check_watchpoints(watchpoints=watchpoints)
|
|
||||||
assert watch_point_hits == []
|
|
Loading…
Reference in New Issue