From aacc85caecc53db7d4ff61ea76545f958ff47b96 Mon Sep 17 00:00:00 2001 From: WeibiaoYu Date: Sun, 26 Apr 2020 22:01:27 -0400 Subject: [PATCH] not supporte to do auto saving intergrated checkpoint files in manual mode parallel --- mindspore/train/callback.py | 4 ++-- mindspore/train/serialization.py | 37 -------------------------------- 2 files changed, 2 insertions(+), 39 deletions(-) diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py index b9635acc62e..d14a1fab285 100644 --- a/mindspore/train/callback.py +++ b/mindspore/train/callback.py @@ -150,8 +150,8 @@ class CheckpointConfig: keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5. keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0. Can't be used with keep_checkpoint_max at the same time. - integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True. - Integrated save function is only supported in automatic parall scene, not supported in manual parallel. + integrated_save (bool): Whether to intergrated save in automatic model parallel scene. Default: True. + Integrated save function is only supported in automatic parallel scene, not supported in manual parallel. Raises: ValueError: If the input_param is None or 0. diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index ae17bf81165..74aa2c2253a 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -225,15 +225,6 @@ def load_param_into_net(net, parameter_dict): raise TypeError(msg) logger.info("Execute load parameter into net process.") - for name in parameter_dict: - for _, param in net.parameters_and_names(): - if name == param.name and param.layerwise_parallel: - # layerwise parallel parameter data loaded from checkpoint file, - # was a complete(merged) data, need to be splited - new_param = parameter_dict[param.name] - _load_tensor_for_layerwise(new_param, param) - break - param_not_load = [] for _, param in net.parameters_and_names(): if param.name in parameter_dict: @@ -363,34 +354,6 @@ def _get_merged_param_data(net, param_name, param_data): return param_data -def _load_tensor_for_layerwise(new_param, old_param): - """ - Replaces parameters with sliced tensors by layerwise parallel strategies. - - Args: - new_param (Parameter): The new layerwise parallel parameter, will be loaded into net. - old_param(Parameter): The current parameter in the net. - """ - if not isinstance(new_param.data, Tensor) or not isinstance(old_param.data, Tensor): - logger.error("Failed to combine the net and the parameters.") - msg = ("layerwise parallel parameter should be a Tensor, but got {}.".format(type(new_param.data))) - raise TypeError(msg) - - if old_param.data.shape() == new_param.data.shape(): - return - - from mindspore.parallel._tensor import _load_tensor - from mindspore.communication.management import get_group_size - dev_mat = [get_group_size()] - shape = new_param.data.shape() - for x in range(len(shape)): # dim 0 set 0, others set -1 - if x: - tensor_map.append(-1) - - new_tensor = _load_tensor(new_param.data, dev_mat, tensor_map) - new_param.set_parameter_data(new_tensor) - - def _fill_param_into_net(net, parameter_list): """ Fills parameter_list into net.