!715 [MS] will not supporte to do auto saving intergrated checkpoint files in manual mode parallel scense

Merge pull request !715 from WeibiaoYu/master
This commit is contained in:
mindspore-ci-bot 2020-04-27 14:15:51 +08:00 committed by Gitee
commit 4779a87761
2 changed files with 2 additions and 39 deletions

View File

@ -150,8 +150,8 @@ class CheckpointConfig:
keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5.
keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0.
Can't be used with keep_checkpoint_max at the same time.
integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True.
Integrated save function is only supported in automatic parall scene, not supported in manual parallel.
integrated_save (bool): Whether to intergrated save in automatic model parallel scene. Default: True.
Integrated save function is only supported in automatic parallel scene, not supported in manual parallel.
Raises:
ValueError: If the input_param is None or 0.

View File

@ -230,15 +230,6 @@ def load_param_into_net(net, parameter_dict):
raise TypeError(msg)
logger.info("Execute load parameter into net process.")
for name in parameter_dict:
for _, param in net.parameters_and_names():
if name == param.name and param.layerwise_parallel:
# layerwise parallel parameter data loaded from checkpoint file,
# was a complete(merged) data, need to be splited
new_param = parameter_dict[param.name]
_load_tensor_for_layerwise(new_param, param)
break
param_not_load = []
for _, param in net.parameters_and_names():
if param.name in parameter_dict:
@ -368,34 +359,6 @@ def _get_merged_param_data(net, param_name, param_data):
return param_data
def _load_tensor_for_layerwise(new_param, old_param):
"""
Replaces parameters with sliced tensors by layerwise parallel strategies.
Args:
new_param (Parameter): The new layerwise parallel parameter, will be loaded into net.
old_param(Parameter): The current parameter in the net.
"""
if not isinstance(new_param.data, Tensor) or not isinstance(old_param.data, Tensor):
logger.error("Failed to combine the net and the parameters.")
msg = ("layerwise parallel parameter should be a Tensor, but got {}.".format(type(new_param.data)))
raise TypeError(msg)
if old_param.data.shape() == new_param.data.shape():
return
from mindspore.parallel._tensor import _load_tensor
from mindspore.communication.management import get_group_size
dev_mat = [get_group_size()]
shape = new_param.data.shape()
for x in range(len(shape)): # dim 0 set 0, others set -1
if x:
tensor_map.append(-1)
new_tensor = _load_tensor(new_param.data, dev_mat, tensor_map)
new_param.set_parameter_data(new_tensor)
def _fill_param_into_net(net, parameter_list):
"""
Fills parameter_list into net.