Add more log when collect graph and use summary operators

Fix can not collect input data when batch size is 1 and total step
number is 1

Fixed spelling errors
This commit is contained in:
ougongchang 2021-01-04 11:36:07 +08:00
parent 7a7b455a57
commit c6e4b0c85f
6 changed files with 35 additions and 44 deletions

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,14 +15,24 @@
"""debug_ops"""
from types import FunctionType, MethodType
from mindspore import context
from ..._checkparam import Validator as validator
from ..._checkparam import Rel
from ...common import dtype as mstype
from ..primitive import prim_attr_register, PrimitiveWithInfer
def _check_mode(class_name):
"""Check for PyNative mode."""
mode = context.get_context('mode')
if mode == context.PYNATIVE_MODE:
raise RuntimeError(f'{class_name} operator does not support PyNative mode.')
def _check_summary_param(name, value, class_name):
"""Checks the name and value is valid for summary."""
_check_mode(class_name)
n_type = name['dtype']
n_value = name['value']
validator.check_value_type('name', n_type, [type(mstype.string)], class_name)

View File

@ -99,7 +99,9 @@ class SummaryCollector(Callback):
- collect_eval_lineage (bool): Whether to collect lineage data for the evaluation phase,
this field will be displayed on the lineage page of Mindinsight. Optional: True/False. Default: True.
- collect_input_data (bool): Whether to collect dataset for each training.
Currently only image data is supported. Optional: True/False. Default: True.
Currently only image data is supported.
If there are multiple columns of data in the dataset, the first column should be image data.
Optional: True/False. Default: True.
- collect_dataset_graph (bool): Whether to collect dataset graph for the training phase.
Optional: True/False. Default: True.
- histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page
@ -122,7 +124,7 @@ class SummaryCollector(Callback):
Default: None, which means to follow the behavior as described above. For example, given `collect_freq=10`,
when the total steps is 600, TensorSummary will be collected 20 steps, while other summary data 61 steps,
but when the total steps is 20, both TensorSummary and other summary will be collected 3 steps.
Also note that when in parallel mode, the total steps will be splitted evenly, which will
Also note that when in parallel mode, the total steps will be split evenly, which will
affect the number of steps TensorSummary will be collected.
max_file_size (Optional[int]): The maximum size in bytes of each file that can be written to the disk.
Default: None, which means no limit. For example, to write not larger than 4GB,
@ -479,17 +481,21 @@ class SummaryCollector(Callback):
if not self._collect_specified_data.get('collect_input_data'):
return
if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
input_data = getattr(cb_params, 'train_dataset_element', None)
if not isinstance(input_data, (Tensor, list, tuple)):
self._collect_specified_data['collect_input_data'] = False
logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
'in dataset sink mode.')
logger.warning("The type of input data is not Tensor/list/tuple, "
"so SummaryCollector will not collect input data.")
return
input_data = getattr(cb_params, 'train_dataset_element', None)
if input_data is None:
if not isinstance(input_data, Tensor) and not input_data:
self._collect_specified_data['collect_input_data'] = False
logger.info("The 'train_dataset_element' in cb_params is None, "
"so 'SummaryCollector' will not record the input data.")
logger.warning("The 'train_dataset_element' in cb_params is empty, "
"so SummaryCollector will not record the input data.")
if self._dataset_sink_mode and context.get_context('device_target') == 'Ascend':
logger.warning('On Ascend device, SummaryCollector is not supported to record input data '
'in dataset sink mode.')
return
if isinstance(input_data, (list, tuple)) and input_data:
@ -522,6 +528,8 @@ class SummaryCollector(Callback):
network = cb_params.train_network if cb_params.mode == ModeEnum.TRAIN.value else cb_params.eval_network
graph_proto = network.get_func_graph_proto()
if graph_proto is None:
logger.warning("Can not get graph proto, it may not be 'GRAPH_MODE' in context currently, "
"so SummaryCollector will not collect graph.")
return
self._record.add_value(PluginEnum.GRAPH.value, 'train_network/auto', graph_proto)
@ -538,7 +546,7 @@ class SummaryCollector(Callback):
try:
self._record.add_value(PluginEnum.SCALAR.value, 'loss/auto', loss)
except ValueError:
logger.warning("The output of network is not a scalar, so will not collect loss in SummaryCollector.")
logger.warning("The output of network is not a scalar, so SummaryCollector will not collect loss.")
self._collect_specified_data['collect_metric'] = False
def _get_loss(self, cb_params):
@ -557,7 +565,7 @@ class SummaryCollector(Callback):
output = cb_params.net_outputs
if output is None:
logger.warning("Can not find any output by this network, so will not collect loss in SummaryCollector.")
logger.warning("Can not find any output by this network, so SummaryCollector will not collect loss.")
self._is_parse_loss_success = False
return None

View File

@ -422,7 +422,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
col_imgs (Number): The image colume number. Default: 8.
Returns:
Tensor, retrun canvas of image.
Tensor, return canvas of image.
"""
# expand the N1HW to N3HW
if tensor.shape[1] == 1:
@ -435,7 +435,7 @@ def _make_canvas_for_imgs(tensor, col_imgs=8):
cols = min(n, col_imgs)
rows = int(np.ceil(float(n) / cols))
# creat the canvas: expand the n
# create the canvas: expand the n
out_canvas = np.zeros((3, h * rows, w * cols))
i = 0
for y in range(rows):

View File

@ -168,7 +168,7 @@ class TestSummary:
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_onecard
def test_summarycollector_user_defind(self):
"""Test SummaryCollector with user defind."""
"""Test SummaryCollector with user-defined."""
summary_dir = self._run_network(dataset_sink_mode=True, num_samples=2,
custom_lineage_data={'test': 'self test'},
export_options={'tensor_format': 'npy'})

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -93,33 +93,6 @@ def test_InsertGradientOf_2():
print("clip_gradient:", fd(1.1, 0.1))
summary = P.ScalarSummary()
def debug_gradient(dx):
""" debug_gradient """
summary("dx: ", dx)
return dx
debug = P.InsertGradientOf(debug_gradient)
def test_InsertGradientOf_3():
""" test_InsertGradientOf_3 """
def debug_test(x, y):
x = debug(x)
y = debug(y)
c = x * y
return c
def f(x, y):
return grad_all(debug_test)(x, y)
print("debug_gradient:", f(Tensor(1.0), Tensor(2.0)))
def test_print_shape_type():
class Mul(nn.Cell):
def __init__(self):

View File

@ -55,7 +55,7 @@ _SPECIFIED_DATA['collect_metric'] = False
class CustomNet(Cell):
"""Define custom netwrok."""
"""Define custom network."""
def __init__(self):
super(CustomNet, self).__init__()
self.add = TensorAdd