forked from mindspore-Ecosystem/mindspore
Add more log when collect graph and use summary operators
Fix can not collect input data when batch size is 1 and total step number is 1 Fixed spelling errors
This commit is contained in:
parent
7a7b455a57
commit
c6e4b0c85f
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -15,14 +15,24 @@
|
|||
|
||||
"""debug_ops"""
|
||||
from types import FunctionType, MethodType
|
||||
|
||||
from mindspore import context
|
||||
from ..._checkparam import Validator as validator
|
||||
from ..._checkparam import Rel
|
||||
from ...common import dtype as mstype
|
||||
from ..primitive import prim_attr_register, PrimitiveWithInfer
|
||||
|
||||
|
||||
def _check_mode(class_name):
|
||||
"""Check for PyNative mode."""
|
||||
mode = context.get_context('mode')
|
||||
if mode == context.PYNATIVE_MODE:
|
||||
raise RuntimeError(f'{class_name} operator does not support PyNative mode.')
|
||||
|
||||
|
||||
def _check_summary_param(name, value, class_name):
|
||||
"""Checks the name and value is valid for summary."""
|
||||
_check_mode(class_name)
|
||||
n_type = name['dtype']
|
||||
n_value = name['value']
|
||||
validator.check_value_type('name', n_type, [type(mstype.string)], class_name)
|
||||
|
|
|
@ -99,7 +99,9 @@ class SummaryCollector(Callback):
|
|||
- collect_eval_lineage (bool): Whether to collect lineage data for the evaluation phase,
|
||||
this field will be displayed on the lineage page of Mindinsight. Optional: True/False. Default: True.
|
||||
- collect_input_data (bool): Whether to collect dataset for each training.
|
||||
Currently only image data is supported. Optional: True/False. Default: True.
|
||||
Currently only image data is supported.
|
||||
If there are multiple columns of data in the dataset, the first column should be image data.
|
||||
Optional: True/False. Default: True.
|
||||
- collect_dataset_graph (bool): Whether to collect dataset graph for the training phase.
|
||||
Optional: True/False. Default: True.
|
||||
- histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page
|
||||
|
@ -122,7 +124,7 @@ class SummaryCollector(Callback):
|
|||
Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`,
|
||||
when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps,
|
||||
but when the total steps is 20, both TensorSummary and other summary will be collected 3 steps.
|
||||
Also note that when in parallel mode, the total steps will be splitted evenly, which will
|
||||
Also note that when in parallel mode, the total steps will be split evenly, which will
|
||||
affect the number of steps TensorSummary will be collected.
|
||||
max_file_size (Optional[int]): The maximum size in bytes of each file that can be written to the disk.
|
||||
Default: None, which means no limit. For example, to write not larger than 4GB,
|
||||
|
@ -479,17 +481,21 @@ class SummaryCollector(Callback):
|
|||
if not self._collect_specified_data.get('collect_input_data'):
|
||||
return
|
||||
|
||||
if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
|
||||
input_data = getattr(cb_params, 'train_dataset_element', None)
|
||||
if not isinstance(input_data, (Tensor, list, tuple)):
|
||||
self._collect_specified_data['collect_input_data'] = False
|
||||
logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
|
||||
'in dataset sink mode.')
|
||||
logger.warning("The type of input data is not Tensor/list/tuple, "
|
||||
"so SummaryCollector will not collect input data.")
|
||||
return
|
||||
|
||||
input_data = getattr(cb_params, 'train_dataset_element', None)
|
||||
if input_data is None:
|
||||
if not isinstance(input_data, Tensor) and not input_data:
|
||||
self._collect_specified_data['collect_input_data'] = False
|
||||
logger.info("The 'train_dataset_element' in cb_params is None, "
|
||||
"so 'SummaryCollector' will not record the input data.")
|
||||
logger.warning("The 'train_dataset_element' in cb_params is empty, "
|
||||
"so SummaryCollector will not record the input data.")
|
||||
|
||||
if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
|
||||
logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
|
||||
'in dataset sink mode.')
|
||||
return
|
||||
|
||||
if isinstance(input_data, (list, tuple)) and input_data:
|
||||
|
@ -522,6 +528,8 @@ class SummaryCollector(Callback):
|
|||
network = cb_params.train_network if cb_params.mode == ModeEnum.TRAIN.value else cb_params.eval_network
|
||||
graph_proto = network.get_func_graph_proto()
|
||||
if graph_proto is None:
|
||||
logger.warning("Can not get graph proto, it may not be 'GRAPH_MODE' in context currently, "
|
||||
"so SummaryCollector will not collect graph.")
|
||||
return
|
||||
|
||||
self._record.add_value(PluginEnum.GRAPH.value, 'train_network/auto', graph_proto)
|
||||
|
@ -538,7 +546,7 @@ class SummaryCollector(Callback):
|
|||
try:
|
||||
self._record.add_value(PluginEnum.SCALAR.value, 'loss/auto', loss)
|
||||
except ValueError:
|
||||
logger.warning("The output of network is not a scalar, so will not collect loss in SummaryCollector.")
|
||||
logger.warning("The output of network is not a scalar, so SummaryCollector will not collect loss.")
|
||||
self._collect_specified_data['collect_metric'] = False
|
||||
|
||||
def _get_loss(self, cb_params):
|
||||
|
@ -557,7 +565,7 @@ class SummaryCollector(Callback):
|
|||
|
||||
output = cb_params.net_outputs
|
||||
if output is None:
|
||||
logger.warning("Can not find any output by this network, so will not collect loss in SummaryCollector.")
|
||||
logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
|
||||
self._is_parse_loss_success = False
|
||||
return None
|
||||
|
||||
|
|
|
@ -422,7 +422,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
|
|||
col_imgs (Number): The image colume number. Default: 8.
|
||||
|
||||
Returns:
|
||||
Tensor, retrun canvas of image.
|
||||
Tensor, return canvas of image.
|
||||
"""
|
||||
# expand the N1HW to N3HW
|
||||
if tensor.shape[1] == 1:
|
||||
|
@ -435,7 +435,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
|
|||
cols = min(n, col_imgs)
|
||||
rows = int(np.ceil(float(n) / cols))
|
||||
|
||||
# creat the canvas: expand the n
|
||||
# create the canvas: expand the n
|
||||
out_canvas = np.zeros((3, h * rows, w * cols))
|
||||
i = 0
|
||||
for y in range(rows):
|
||||
|
|
|
@ -168,7 +168,7 @@ class TestSummary:
|
|||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_summarycollector_user_defind(self):
|
||||
"""Test SummaryCollector with user defind."""
|
||||
"""Test SummaryCollector with user-defined."""
|
||||
summary_dir = self._run_network(dataset_sink_mode=True, num_samples=2,
|
||||
custom_lineage_data={'test': 'self test'},
|
||||
export_options={'tensor_format': 'npy'})
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -93,33 +93,6 @@ def test_InsertGradientOf_2():
|
|||
print("clip_gradient:", fd(1.1, 0.1))
|
||||
|
||||
|
||||
summary = P.ScalarSummary()
|
||||
|
||||
|
||||
def debug_gradient(dx):
|
||||
""" debug_gradient """
|
||||
summary("dx: ", dx)
|
||||
return dx
|
||||
|
||||
|
||||
debug = P.InsertGradientOf(debug_gradient)
|
||||
|
||||
|
||||
def test_InsertGradientOf_3():
|
||||
""" test_InsertGradientOf_3 """
|
||||
|
||||
def debug_test(x, y):
|
||||
x = debug(x)
|
||||
y = debug(y)
|
||||
c = x * y
|
||||
return c
|
||||
|
||||
def f(x, y):
|
||||
return grad_all(debug_test)(x, y)
|
||||
|
||||
print("debug_gradient:", f(Tensor(1.0), Tensor(2.0)))
|
||||
|
||||
|
||||
def test_print_shape_type():
|
||||
class Mul(nn.Cell):
|
||||
def __init__(self):
|
||||
|
|
|
@ -55,7 +55,7 @@ _SPECIFIED_DATA['collect_metric'] = False
|
|||
|
||||
|
||||
class CustomNet(Cell):
|
||||
"""Define custom netwrok."""
|
||||
"""Define custom network."""
|
||||
def __init__(self):
|
||||
super(CustomNet, self).__init__()
|
||||
self.add = TensorAdd
|
||||
|
|
Loading…
Reference in New Issue