!27245 modify parallel api note for master

Merge pull request !27245 from lilei/modify_parallel_API_note_for_master
This commit is contained in:
i-robot 2021-12-06 11:04:24 +00:00 committed by Gitee
commit d8e5200e39
11 changed files with 160 additions and 133 deletions

View File

@ -60,7 +60,7 @@ bool InitDevice(int64_t device_num, int64_t global_rank, const std::string &back
return false;
}
if ((backend != HCCL_BACKEND) && (backend != NCCL_BACKEND) && (backend != UNDEFINED_BACKEND)) {
MS_LOG(ERROR) << "The context configuration parameter 'backend' must be hccl, nccl "
MS_LOG(ERROR) << "For 'InitDevice', the argument 'backend' must be hccl, nccl "
"or undefined_backend, but got invalid backend: "
<< backend;
return false;
@ -156,7 +156,7 @@ std::shared_ptr<Device> GetListMemberByIndex(size_t index, const std::vector<std
Status DeviceManager::Init(const RankList &devices, int64_t global_device_rank, const RankList &stage_map,
const std::string &backend) {
if ((backend != HCCL_BACKEND) && (backend != NCCL_BACKEND) && (backend != UNDEFINED_BACKEND)) {
MS_LOG(ERROR) << "The context configuration parameter 'backend' must be hccl, nccl "
MS_LOG(ERROR) << "For 'Init', the argument 'backend' must be hccl, nccl "
"or undefined_backend, but got invalid backend: "
<< backend;
return FAILED;

View File

@ -96,12 +96,12 @@ bool PipelineSplit(const ResourcePtr &res) {
device_num = parallel::ParallelContext::GetInstance()->device_num();
}
if (device_num < 1) {
MS_LOG(ERROR) << "The context configuration parameter 'device_num' must be positive, "
MS_LOG(ERROR) << "For 'PipelineSplit', the argument 'device_num' must be positive, "
"but got the value of device_num: "
<< device_num;
}
if (global_rank < 0) {
MS_LOG(ERROR) << "The context configuration parameter 'global_rank' must be nonnegative, "
MS_LOG(ERROR) << "For 'PipelineSplit', the argument 'global_rank' must be nonnegative, "
"but got the value of global_rank: "
<< global_rank;
}

View File

@ -82,11 +82,11 @@ class Backend:
def __new__(cls, name):
"""Create instance object of Backend."""
if not isinstance(name, str):
raise TypeError("The context configuration parameter 'name' must be a string, "
raise TypeError("For 'Backend', the class variable 'name' must be a string, "
"but got the type : {}".format(type(name)))
value = getattr(Backend, name.upper(), Backend.UNDEFINED)
if value == Backend.UNDEFINED:
raise ValueError("The context configuration parameter 'name' {} is not supported, "
raise ValueError("For 'Backend', the class variable 'name' {} is not supported, "
"please use hccl or nccl.".format(name))
return value
@ -212,7 +212,7 @@ def _get_rank_helper(group, backend):
elif backend == Backend.NCCL:
rank_id = mpi.get_rank_id(group)
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
raise ValueError("For '_get_rank_helper', the argument 'backend' {} is not supported, "
"please use hccl_mpi, hccl or nccl.".format(backend))
return rank_id
@ -243,7 +243,7 @@ def _get_local_rank_helper(group, backend):
elif backend == Backend.NCCL:
raise RuntimeError("Nccl doesn't support get_local_rank_id now.")
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
raise ValueError("For '_get_local_rank_helper', the argument 'backend' {} is not supported, "
"please use hccl_mpi or hccl.".format(backend))
return rank_id
@ -277,7 +277,7 @@ def _get_size_helper(group, backend):
elif backend == Backend.NCCL:
size = mpi.get_rank_size(group)
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
raise ValueError("For '_get_size_helper', the argument 'backend' {} is not supported, "
"please use hccl or nccl.".format(backend))
return size
@ -306,7 +306,7 @@ def _get_local_size_helper(group, backend):
elif backend == Backend.NCCL:
raise RuntimeError("Nccl doesn't support get_local_rank_size now.")
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
raise ValueError("For '_get_local_size_helper', the argument 'backend' {} is not supported, "
"please use hccl.".format(backend))
return size
@ -330,16 +330,17 @@ def _get_world_rank_from_group_rank_helper(group, group_rank_id, backend):
"""
world_rank_id = None
if not isinstance(group_rank_id, int):
raise TypeError("The parameter 'group_rank_id' must be int, but got type {}".format(type(group_rank_id)))
raise TypeError("For 'get_world_rank_from_group_rank', the argument 'group_rank_id' must be"
" type of int, but got 'group_rank_id' type : {}.".format(type(group_rank_id)))
if backend == Backend.HCCL:
if group == HCCL_WORLD_COMM_GROUP:
raise ValueError("The parameter 'group' cannot be 'hccl_world_group'. ")
raise ValueError("For 'get_world_rank_from_group_rank' on GPU, the argument 'group' "
"should be 'NCCL_WORLD_COMM_GROUP', but got 'HCCL_WORLD_COMM_GROUP'.")
world_rank_id = hccl.get_world_rank_from_group_rank(group, group_rank_id)
elif backend == Backend.NCCL:
raise RuntimeError("Nccl doesn't support get_world_rank_from_group_rank now.")
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
"please use hccl.".format(backend))
raise ValueError("The argument 'backend' {} is not supported, please use hccl.".format(backend))
return world_rank_id
@ -362,16 +363,17 @@ def _get_group_rank_from_world_rank_helper(world_rank_id, group, backend):
"""
group_rank_id = None
if not isinstance(world_rank_id, int):
raise TypeError("The parameter 'world_rank_id' should be int, but got type {}".format(type(world_rank_id)))
raise TypeError("For 'get_group_rank_from_world_rank', the argument 'world_rank_id' must be type of int, "
"but got 'world_rank_id' type : {}.".format(type(world_rank_id)))
if backend == Backend.HCCL:
if group == HCCL_WORLD_COMM_GROUP:
raise ValueError("The parameter group cannot be 'hccl_world_group'. ")
raise ValueError("For 'get_group_rank_from_world_rank' on GPU, the argument 'group' "
"should be 'NCCL_WORLD_COMM_GROUP', but got 'HCCL_WORLD_COMM_GROUP'.")
group_rank_id = hccl.get_group_rank_from_world_rank(world_rank_id, group)
elif backend == Backend.NCCL:
raise RuntimeError("Nccl doesn't support get_group_rank_from_world_rank now.")
else:
raise ValueError("The context configuration parameter 'backend' {} is not supported, "
"please use hccl.".format(backend))
raise ValueError("The argument 'backend' {} is not supported, please use hccl.".format(backend))
return group_rank_id
@ -398,12 +400,12 @@ def _create_group_helper(group, rank_ids, backend):
return
if backend == Backend.HCCL:
if not isinstance(rank_ids, list):
raise TypeError("The type of parameter 'rank_ids' should be list, but got the type : {}."
.format(type(rank_ids)))
raise TypeError("For 'create_group', the argument 'rank_ids' must be type of list, "
"but got 'rank_ids' type : {}.".format(type(rank_ids)))
rank_size = len(rank_ids)
if rank_size < 1:
raise ValueError("The parameter 'rank_ids' size should be large than 0, "
"but got the value : {}.".format(rank_size))
raise ValueError("For 'create_group', the argument 'rank_ids' size should be greater than 1, "
"but got 'rank_ids' size : {}.".format(len(rank_ids)))
if len(rank_ids) - len(list(set(rank_ids))) > 0:
raise ValueError("List rank_ids in Group {} has duplicate data!".format(group))
hccl.create_group(group, rank_size, rank_ids)

View File

@ -35,10 +35,11 @@ def check_group(group):
if isinstance(group, (str)):
group_len = len(group)
if group_len > MAX_GROUP_NAME_LEN or group_len == 0:
raise ValueError("The length of parameter 'group' should in range [1, {}], but got the value : {}"
.format(MAX_GROUP_NAME_LEN, group_len))
raise ValueError("The length of communication group name must be in range [1, 127), "
"but got the value : {} ".format(group_len))
else:
raise TypeError("The context configuration parameter 'group' must be a string, but got {}".format(type(group)))
raise TypeError("The type of communication group name must be type of string, "
"but got 'group' type : {}.".format(type(group)))
def check_rank_num(rank_num):
@ -50,10 +51,11 @@ def check_rank_num(rank_num):
"""
if isinstance(rank_num, (int)):
if rank_num > MAX_RANK_NUM or rank_num <= 0:
raise ValueError("The parameter 'rank_num' should in range [1, {}], but got the value : {}"
.format(MAX_RANK_NUM, rank_num))
raise ValueError("For 'create_group', the argument 'rank_num' should be greater than 0 and less than {}, "
"but got 'rank_num' value : {}.".format(MAX_RANK_NUM, rank_num))
else:
raise TypeError("The parameter 'rank_num' must be a python int, but got {}".format(type(rank_num)))
raise TypeError("For 'create_group', the argument 'rank_num' must be type of int, "
"but got 'rank_num' type : {}.".format(type(rank_num)))
def check_rank_id(rank_id):
@ -65,10 +67,11 @@ def check_rank_id(rank_id):
"""
if isinstance(rank_id, (int)):
if rank_id >= MAX_RANK_NUM or rank_id < 0:
raise ValueError("The parameter 'rank_id' should in range [1, {}], but got the value : {}"
.format(MAX_RANK_NUM, rank_id))
raise ValueError("The rand id in the communication group must be greater or equal 0 and "
"less than {}, but got type value : {}.".format(MAX_RANK_NUM, rank_id))
else:
raise TypeError("The parameter 'rank_id' must be a python int, but got {}".format(type(rank_id)))
raise TypeError("The rand id in the communication group must be must be type of int, "
"but got type value : {}.".format(type(rank_id)))
def load_lib():
@ -113,11 +116,13 @@ def create_group(group, rank_num, rank_ids):
check_rank_num(rank_num)
if isinstance(rank_ids, (list)):
if rank_num != len(rank_ids):
raise ValueError("The parameter 'rank_num' number is not equal to the length of rank_ids, "
"but got 'rank_num' : {} and 'rank_ids' : {}.".format(rank_num, rank_ids))
raise ValueError("For 'create_group', the argument 'rank_num' number should be equal to the length "
"of rank_ids, but got 'rank_num' value : {} and 'rank_ids' value : {}."
.format(rank_num, rank_ids))
for rank_id in rank_ids:
if not isinstance(rank_id, (int)) or rank_id < 0:
raise ValueError("The parameter 'rank_id' must be unsigned integer, but got {}".format(type(rank_id)))
raise ValueError("For 'create_group', the elements of argument 'rank_ids' must be "
"unsigned integer, but got the type : {}".format(type(rank_id)))
c_array_rank_ids = c_array(ctypes.c_uint, rank_ids)
c_rank_num = ctypes.c_uint(rank_num)
c_group = c_str(group)
@ -125,7 +130,8 @@ def create_group(group, rank_num, rank_ids):
if ret != 0:
raise RuntimeError('Create group error, the error code is ' + str(ret))
else:
raise TypeError("The parameter 'rank_id' must be a python list, but got {}".format(type(rank_ids)))
raise TypeError("For 'create_group', the argument 'rank_ids' must be type of list, "
"but got 'rank_ids' type : {}.".format(type(rank_ids)))
def destroy_group(group):

View File

@ -72,7 +72,7 @@ def _check_parallel_envs():
try:
int(rank_id_str)
except ValueError:
print("The parameter 'RANK_ID' should be number, but got {}".format(type(rank_id_str)))
print("Environment variables 'RANK_ID' should be number, but got the type : {}".format(type(rank_id_str)))
finally:
pass
rank_table_file_str = os.getenv("MINDSPORE_HCCL_CONFIG_PATH")
@ -121,15 +121,15 @@ def init(backend_name=None):
elif device_target == "GPU":
backend_name = "nccl"
else:
raise RuntimeError("The context configuration parameter 'device_target' {} is not supported in "
raise RuntimeError("For 'set_context', the argument 'device_target' {} is not supported in "
"parallel initialization, please use Ascend or GPU.".format(device_target))
if not isinstance(backend_name, str):
raise TypeError("The context configuration parameter 'backend_name' must be a string, "
raise TypeError("For 'init', the argument 'backend_name' must be a string, "
"but got the type : {}".format(type(backend_name)))
if backend_name == "hccl":
if device_target != "Ascend":
raise RuntimeError("The context configuration parameter 'device_target' should be 'Ascend' to init hccl, "
raise RuntimeError("For 'init', the argument 'backend_name' should be 'Ascend' to init hccl, "
"but got {}".format(device_target))
if not mpi_init:
_check_parallel_envs()
@ -145,8 +145,8 @@ def init(backend_name=None):
GlobalComm.WORLD_COMM_GROUP = NCCL_WORLD_COMM_GROUP
GlobalComm.INITED = True
else:
raise RuntimeError("The context configuration parameter 'backend_name' {} is not supported, "
"please use hccl or nccl.".format(backend_name))
raise RuntimeError("For 'init', the argument 'backend_name' must be nccl while 'device_target' is GPU, "
"but got the 'backend_name' : hccl.")
def release():
@ -195,8 +195,8 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
>>> # the result is the rank_id in world_group
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_rank', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
@ -231,8 +231,8 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
local_rank is: 1, world_rank is 9
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_local_rank', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_local_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
@ -266,8 +266,8 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
group_size is: 8
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_group_size', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
@ -301,8 +301,8 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
local_rank_size is: 8
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_local_rank_size', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_local_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
@ -342,8 +342,8 @@ def get_world_rank_from_group_rank(group, group_rank_id):
world_rank_id is: 4
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_world_rank_from_group_rank', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_world_rank_from_group_rank_helper(group=group, group_rank_id=group_rank_id, backend=GlobalComm.BACKEND)
@ -384,8 +384,8 @@ def get_group_rank_from_world_rank(world_rank_id, group):
group_rank_id is: 1
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'get_group_rank_from_world_rank', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
return _get_group_rank_from_world_rank_helper(world_rank_id=world_rank_id, group=group, backend=GlobalComm.BACKEND)
@ -422,8 +422,8 @@ def create_group(group, rank_ids):
>>> allreduce = ops.AllReduce(group)
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the value : {}".format(type(group)))
raise TypeError("For 'create_group', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
_create_group_helper(group, rank_ids, backend=GlobalComm.BACKEND)
@ -445,6 +445,6 @@ def destroy_group(group):
RuntimeError: If HCCL is not available or MindSpore is GPU version.
"""
if not isinstance(group, str):
raise TypeError("The context configuration parameter 'group' must be a string, "
"but got the type : {}".format(type(group)))
raise TypeError("For 'destroy_group', the argument 'group' must be type of string, "
"but got 'group' type : {}.".format(type(group)))
_destroy_group_helper(group, backend=GlobalComm.BACKEND)

View File

@ -265,12 +265,12 @@ class Cell(Cell_):
@pipeline_stage.setter
def pipeline_stage(self, value):
if not isinstance(value, int):
raise TypeError(f"For 'Cell', the property 'pipeline_stage' must be int type, "
f"but got type {type(value)}.")
raise TypeError("For 'context.set_auto_parallel_context', the argument 'pipeline_stages' "
"must be int type, but got type : {}".format(type(value)))
if value < 0:
raise TypeError("For 'Cell', the property 'pipeline_stage' can not be less than 0, "
"but got {}".format(value))
raise ValueError("For 'context.set_auto_parallel_context', the argument 'pipeline_stages' "
"can not be less than 0, but got {}".format(value))
self._pipeline_stage = value
for item in self.trainable_params():
item.add_pipeline_stage(value)

View File

@ -203,10 +203,11 @@ class _AutoParallelContext:
def set_pipeline_stages(self, stages):
"""Set the stages of the pipeline"""
if isinstance(stages, bool) or not isinstance(stages, int):
raise TypeError("The type of pipeline_stage_num must be int, but got the type : {}.".format(type(stages)))
raise TypeError("For 'set_auto_parallel_context().set_pipeline_stages', the argument 'pipeline_stages' "
"must be int, but got the type : {}.".format(type(stages)))
if stages < 1:
raise ValueError("The parameter pipeline_stage_num be greater or equal 1, "
"but got the value of stages : {}.".format(stages))
raise ValueError("For 'set_auto_parallel_context().set_pipeline_stages', the argument 'pipeline_stages' "
"should be greater or equal 1, but got the value of stages : {}.".format(stages))
self.check_context_handle()
self._context_handle.set_pipeline_stage_split_num(stages)
@ -265,8 +266,8 @@ class _AutoParallelContext:
loss_repeated_mean (bool): The loss_repeated_mean flag.
"""
if not isinstance(loss_repeated_mean, bool):
raise TypeError("The type of context configuration parameter 'loss_repeated_mean' must be bool, "
"but got the type : {}.".format(type(loss_repeated_mean)))
raise TypeError("For 'auto_parallel_context().set_loss_repeated_mean', the argument 'loss_repeated_mean' "
"must be bool, but got the type : {}.".format(type(loss_repeated_mean)))
self.check_context_handle()
self._context_handle.set_loss_repeated_mean(loss_repeated_mean)
@ -314,7 +315,7 @@ class _AutoParallelContext:
self.check_context_handle()
ret = self._context_handle.set_strategy_search_mode(search_mode)
if ret is False:
raise ValueError("The context configuration parameter 'search_mode' only support "
raise ValueError("The context configuration parameter 'auto_parallel_search_mode' only support "
"'recursive_programming' and 'dynamic_programming', but got the value : {}."
.format(search_mode))
@ -363,8 +364,9 @@ class _AutoParallelContext:
"and this attribute will be deleted in a future MindSpore version.")
self.check_context_handle()
if not isinstance(sharding_propagation, bool):
raise TypeError("The type of parameter 'sharding_propagation' must be bool, "
"but got the type : {}.".format(type(sharding_propagation)))
raise TypeError("For 'set_auto_parallel_context().set_sharding_propagation', "
"the argument 'sharding_propagation' must be bool, but got the type : {}."
.format(type(sharding_propagation)))
self._context_handle.set_sharding_propagation(sharding_propagation)
def get_sharding_propagation(self):
@ -429,21 +431,23 @@ class _AutoParallelContext:
self.check_context_handle()
if isinstance(dataset_strategy, str):
if dataset_strategy not in ("full_batch", "data_parallel"):
raise ValueError("The context configuration parameter 'dataset_strategy' must be "
"'full_batch' or 'data_parallel', but got the value : {}.".format(dataset_strategy))
raise ValueError("For 'set_auto_parallel_context().set_dataset_strategy', the argument "
"'dataset_strategy' must be 'full_batch' or 'data_parallel', but got the value : {}."
.format(dataset_strategy))
self._context_handle.set_full_batch(dataset_strategy == "full_batch")
self._dataset_strategy_using_str = True
return
if not isinstance(dataset_strategy, tuple):
raise TypeError("The type of context configuration parameter 'strategy' must be str or tuple type, "
"but got the type : {}.".format(type(dataset_strategy)))
raise TypeError("For 'set_auto_parallel_context().set_dataset_strategy', the argument 'dataset_strategy' "
"must be str or tuple type, but got the type : {}.".format(type(dataset_strategy)))
for ele in dataset_strategy:
if not isinstance(ele, tuple):
raise TypeError("The element of strategy must be tuple, but got the type : {} .".format(type(ele)))
raise TypeError("For 'set_auto_parallel_context().set_dataset_strategy', the element of argument "
"'dataset_strategy' must be tuple, but got the type : {} .".format(type(ele)))
for dim in ele:
if not isinstance(dim, int):
raise TypeError("The dim of each strategy value must be int type, "
"but got the type : {} .".format(type(dim)))
raise TypeError("For 'set_auto_parallel_context().set_dataset_strategy', the element of argument "
"'dataset_strategy' must be int type, but got the type : {} .".format(type(dim)))
self._dataset_strategy_using_str = False
self._context_handle.set_dataset_strategy(dataset_strategy)
@ -517,22 +521,24 @@ class _AutoParallelContext:
"""
self.check_context_handle()
if not indices:
raise ValueError("The parameter 'indices' can not be empty")
raise ValueError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_indices', "
"the argument 'indices' can not be empty")
if isinstance(indices, (list)):
for index in indices:
if not isinstance(index, int) or isinstance(index, bool):
raise TypeError("The type of parameter 'index' must be int, but got the type : {} ."
.format(type(index)))
raise TypeError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_indices', "
"the argument 'index' must be int, but got the type : {} .".format(type(index)))
else:
raise TypeError("The type of parameter 'indices' must be a python list, but got the type : {} ."
.format(type(indices)))
raise TypeError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_indices', "
"the argument 'indices' must be list, but got the type : {} .".format(type(indices)))
if len(set(indices)) != len(indices):
raise ValueError("The indices has duplicate elements")
if sorted(indices) != indices:
raise ValueError("The elements in indices must be sorted in ascending order")
raise ValueError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_indices', "
"the elements in argument 'indices' must be sorted in ascending order")
new_group = self._check_and_default_group(group)
@ -573,10 +579,11 @@ class _AutoParallelContext:
if isinstance(sizes, (list)):
for size in sizes:
if not isinstance(size, int) or isinstance(size, bool):
raise TypeError("The type of size must be int, but got the type : {}.".format(type(size)))
raise TypeError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_sizes', "
"the argument 'sizes' must be int, but got the type : {}.".format(type(size)))
else:
raise TypeError("The type of parameter 'sizes' must be a python list, but got the type : {}."
.format(type(sizes)))
raise TypeError("For 'set_auto_parallel_context().set_all_reduce_fusion_split_sizes', "
"the argument 'sizes' must be list, but got the type : {}.".format(type(sizes)))
new_group = self._check_and_default_group(group)
self._context_handle.set_all_reduce_fusion_split_sizes(sizes, new_group)
@ -609,8 +616,9 @@ class _AutoParallelContext:
"""
self.check_context_handle()
if not isinstance(enable_all_reduce_fusion, bool):
raise TypeError("The type of parameter 'enable_all_reduce_fusion' must be bool, "
"but got the type : {}.".format(type(enable_all_reduce_fusion)))
raise TypeError("For 'set_auto_parallel_context().set_enable_all_reduce_fusion', "
"the argument 'enable_all_reduce_fusion' must be bool, but got the type : {}."
.format(type(enable_all_reduce_fusion)))
self._context_handle.set_enable_all_reduce_fusion(enable_all_reduce_fusion)
def get_enable_all_reduce_fusion(self):
@ -637,8 +645,9 @@ class _AutoParallelContext:
"""
self.check_context_handle()
if not isinstance(enable_parallel_optimizer, bool):
raise TypeError("The type of parameter 'enable_parallel_optimizer' must be bool, "
"but got the type : {}.".format(type(enable_parallel_optimizer)))
raise TypeError("For 'set_auto_parallel_context().set_enable_parallel_optimizer', "
"the argument 'enable_parallel_optimizer' must be bool, but got the type : {}."
.format(type(enable_parallel_optimizer)))
self._context_handle.set_enable_parallel_optimizer(enable_parallel_optimizer)
def get_enable_parallel_optimizer(self):
@ -691,8 +700,8 @@ class _AutoParallelContext:
"""
self.check_context_handle()
if not isinstance(enable_a2a, bool):
raise TypeError("The type of parameter 'enable_a2a' must be bool, "
"but got the type : {}.".format(type(enable_a2a)))
raise TypeError("For 'set_auto_parallel_context().set_enable_alltoall', the argument 'enable_a2a' "
"must be bool, but got the type : {}.".format(type(enable_a2a)))
self._context_handle.set_enable_alltoall(enable_a2a)
def get_enable_alltoall(self):
@ -711,14 +720,16 @@ class _AutoParallelContext:
ValueError: If parallel mode is not supported.
"""
if not isinstance(communi_parallel_mode, str):
raise TypeError("The type of parameter 'communi_parallel_mode' must be str, "
"but got the type : {}.".format(type(communi_parallel_mode)))
raise TypeError("For 'set_auto_parallel_context().set_communi_parallel_mode', "
"the argument 'communi_parallel_mode' must be str, but got the type : {}."
.format(type(communi_parallel_mode)))
self.check_context_handle()
ret = self._context_handle.set_communi_parallel_mode(communi_parallel_mode)
if ret is False:
raise ValueError("The parameter 'communi_parallel_mode' only support 'ALL_GROUP_PARALLEL', "
"'SAME_SEVER_GROUP_PARALLEL' and 'NO_GROUP_PARALLEL', but got the value : {}."
.format(communi_parallel_mode))
raise ValueError("For 'set_auto_parallel_context().set_communi_parallel_mode', "
"the argument 'communi_parallel_mode' only support 'ALL_GROUP_PARALLEL', "
"'SAME_SEVER_GROUP_PARALLEL' and 'NO_GROUP_PARALLEL', "
"but got the value : {}.".format(communi_parallel_mode))
def get_communi_parallel_mode(self):
"""Get communication parallel mode."""

View File

@ -250,11 +250,11 @@ class _CostModelContext:
ValueError: If context handle is none, or phase is not in {0, 1}.
"""
if not isinstance(phase, int) or isinstance(phase, bool):
raise TypeError(f"The type of parameter 'communi_const' must be int, but got {type(phase)}.")
raise TypeError(f"For 'set_run_phase', the argument 'communi_const' must be int, but got {type(phase)}.")
if self._context_handle is None:
raise ValueError("Context handle is none in context!!!")
if phase not in (0, 1):
raise ValueError("The parameter of 'phase' must be '0' or '1', but got {}".format(phase))
raise ValueError("For 'set_run_phase', the argument 'phase' must be '0' or '1', but got {}".format(phase))
self._context_handle.set_run_phase(phase)
def get_run_phase(self):
@ -279,7 +279,8 @@ class _CostModelContext:
ValueError: If context handle is none.
"""
if not isinstance(single_loop, bool):
raise TypeError(f"The type of parameter 'single_loop' must be bool, but got {type(single_loop)}.")
raise TypeError("For 'set_dp_algo_single_loop', the argument 'single_loop' must be bool, "
"but got the type : {}".format(type(single_loop)))
if self._context_handle is None:
raise ValueError("Context handle is none in context!!!")
self._context_handle.set_dp_algo_single_loop(single_loop)

View File

@ -522,14 +522,16 @@ class FixedSparseAttention(nn.Cell):
self.parallel_config = parallel_config
size_per_head_list = [64, 128]
if self.seq_length != 1024:
raise ValueError("The parameter of 'seq_length' must be 1024, but got the value : {}.".format(seq_length))
raise ValueError("For 'FixedSparseAttention', the class variable 'seq_length' must be 1024, "
"but got the value : {}.".format(seq_length))
if self.block_size != 64:
raise ValueError("The parameter of 'block_size' must be 64, but got the value : {}.".format(block_size))
raise ValueError("For 'FixedSparseAttention', the class variable 'block_size' must be 64, "
"but got the value : {}.".format(block_size))
if num_different_global_patterns != 4:
raise ValueError("The parameter of 'num_different_global_patterns' must be 4, "
"but got the value : {}".format(num_different_global_patterns))
raise ValueError("For 'FixedSparseAttention', the class variable 'num_different_global_patterns' "
"must be 4, but got the value : {}".format(num_different_global_patterns))
if self.size_per_head not in size_per_head_list:
raise ValueError("The parameter of 'size_per_head' only supports {}, "
raise ValueError("For 'FixedSparseAttention', the class variable 'size_per_head' only supports {}, "
"but got the value : {}.".format(size_per_head_list, self.size_per_head))
local_ones = np.ones((self.block_size, self.block_size),
dtype=np.float16)

View File

@ -66,7 +66,7 @@ class CrossEntropyLoss(Cell):
def __init__(self, parallel_config=default_dpmp_config):
super(CrossEntropyLoss, self).__init__()
if not isinstance(parallel_config, OpParallelConfig):
raise TypeError("The type of parameter 'parallel_config' must be OpParallelConfig, "
raise TypeError("For 'CrossEntropyLoss', the class variable 'parallel_config' must be OpParallelConfig, "
"but got the type: {}.".format(type(parallel_config)))
dp = parallel_config.data_parallel
mp = parallel_config.model_parallel

View File

@ -422,15 +422,15 @@ class FeedForward(Cell):
dp = parallel_config.data_parallel
mp = parallel_config.model_parallel
if ffn_hidden_size % mp != 0:
raise ValueError("The parameter of 'ffn_hidden_size' must be a multiple of the model parallel way, "
"but got the ffn_hidden_size is {} and the num of model parallel is {}."
raise ValueError("For 'FeedForward', the class variable 'ffn_hidden_size' must be a multiple of the num of "
"model parallel, but got the ffn_hidden_size is {} and the num of model parallel is {}."
.format(ffn_hidden_size, mp))
if hidden_size % mp != 0:
raise ValueError("The parameter of 'hidden_size' must be a multiple of the model parallel way, "
"but got the hidden_size is {} and the num of model parallel is {}."
raise ValueError("For 'FeedForward', the class variable 'hidden_size' must be a multiple of the num of "
"model parallel, but got the hidden_size is {} and the num of model parallel is {}."
.format(hidden_size, mp))
if dropout_rate < 0 or dropout_rate >= 1:
raise ValueError("The parameter of 'dropout_rate' must be in the range [0, 1.0), "
raise ValueError("For 'FeedForward', the class variable 'dropout_rate' must be in the range [0, 1.0), "
"but got the value : {}.".format(dropout_rate))
input_size = hidden_size
output_size = ffn_hidden_size
@ -803,21 +803,24 @@ class MultiHeadAttention(Cell):
self.hidden_size = hidden_size
self.batch_size = batch_size
if hidden_dropout_rate < 0 or hidden_dropout_rate >= 1:
raise ValueError("The parameter 'hidden_dropout_rate' must be in range [0, 1.0), "
"but got the value : {}.".format(hidden_dropout_rate))
raise ValueError("For 'MultiHeadAttention', the class variable 'hidden_dropout_rate' must be "
"in range [0, 1.0), but got the value : {}.".format(hidden_dropout_rate))
if attention_dropout_rate < 0 or attention_dropout_rate >= 1:
raise ValueError("The parameter 'attention_dropout_rate' must be in range [0, 1.0), "
"but got the value : {}.".format(attention_dropout_rate))
raise ValueError("For 'MultiHeadAttention', the class variable 'attention_dropout_rate' must be "
"in range [0, 1.0), but got the value : {}.".format(attention_dropout_rate))
if hidden_size % num_heads != 0:
raise ValueError("The parameter 'hidden_size' should be a multiple of 'num_heads', "
"but got the hidden_size is {} and the num_heads is {}.".format(hidden_size, num_heads))
raise ValueError("For 'MultiHeadAttention', the class variable 'hidden_size' should be a multiple "
"of 'num_heads', but got the hidden_size is {} and the num_heads is {}."
.format(hidden_size, num_heads))
if num_heads % parallel_config.model_parallel != 0:
raise ValueError("The parameter 'num_heads' must be a multiple of 'parallel_config.model_parallel', "
"but got the num_heads is {} and the parallel_config.model_parallel is {}."
raise ValueError("For 'MultiHeadAttention', the class variable 'num_heads' must be a multiple of "
"'parallel_config.model_parallel', but got the num_heads is {} "
"and the parallel_config.model_parallel is {}."
.format(num_heads, parallel_config.model_parallel))
if self.is_parallel_mode and batch_size % parallel_config.data_parallel != 0:
raise ValueError("The parameter 'batch_size' must be a multiple of 'parallel_config.data_parallel', "
"but got the batch_size is {} and the parallel_config.data_parallel is {}."
raise ValueError("For 'MultiHeadAttention', the class variable 'batch_size' must be a multiple of "
"'parallel_config.data_parallel', but got the batch_size is {} "
"and the parallel_config.data_parallel is {}."
.format(batch_size, parallel_config.data_parallel))
self.is_first_iteration = True
# Output layer
@ -1272,17 +1275,18 @@ class TransformerEncoderLayer(Cell):
super(TransformerEncoderLayer, self).__init__()
_check_config(parallel_config)
if num_heads % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'num_heads' must be divisibled by the "
raise ValueError("For 'TransformerEncoderLayer', the class variable 'num_heads' must be divisibled by the "
"'parallel_config.model_parallel', but got the num_heads is {} and "
"parallel_config.model_parallel is {}.".format(num_heads, parallel_config.model_parallel))
if hidden_size % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'hidden_size' must be divisibled by the "
"'parallel_config.model_parallel', but got the hidden_size is {} and parallel_config. "
raise ValueError("For 'TransformerEncoderLayer', the class variable 'hidden_size' must be divisibled by "
"the 'parallel_config.model_parallel', but got the hidden_size is {} and parallel_config. "
"model_parallel is {}.".format(hidden_size, parallel_config.model_parallel))
if ffn_hidden_size % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'ffn_hidden_size' must be divisibled by the "
"'parallel_config.model_parallel', but got the ffn_hidden_size is {} and parallel_config. "
"model_parallel is {}.".format(ffn_hidden_size, parallel_config.model_parallel))
raise ValueError("For 'TransformerEncoderLayer', the class variable 'ffn_hidden_size' must be divisibled "
"by the 'parallel_config.model_parallel', but got the ffn_hidden_size is {} "
"and parallel_config. model_parallel is {}."
.format(ffn_hidden_size, parallel_config.model_parallel))
self.use_past = use_past
self.seq_length = seq_length
self.hidden_size = hidden_size
@ -1571,16 +1575,17 @@ class TransformerDecoderLayer(Cell):
super(TransformerDecoderLayer, self).__init__()
_check_config(parallel_config)
if num_heads % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'num_heads' must be divisibled by 'parallel_config.model_parallel', "
"but got the num_heads is {} and parallel_config.model_parallel is {}."
.format(num_heads, parallel_config.model_parallel))
raise ValueError("For 'TransformerDecoderLayer', the class variable 'num_heads' must be divisibled by "
"'parallel_config.model_parallel', but got the num_heads is {} and "
"parallel_config.model_parallel is {}.".format(num_heads, parallel_config.model_parallel))
if hidden_size % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'hidden_size' must be divisibled by 'parallel_config.model_parallel', "
"but got the hidden_size is {} and parallel_config.model_parallel is {}."
raise ValueError("For 'TransformerDecoderLayer', the class variable 'hidden_size' must be divisibled by "
"'parallel_config.model_parallel', but got the hidden_size is {} and "
"parallel_config.model_parallel is {}."
.format(hidden_size, parallel_config.model_parallel))
if ffn_hidden_size % parallel_config.model_parallel != 0:
raise ValueError("The parameter of 'ffn_hidden_size' must be divisibled by "
"'parallel_config.model_parallel', but got the ffn_hidden_size is {} "
raise ValueError("For 'TransformerDecoderLayer', the class variable 'ffn_hidden_size' must be "
"divisibled by 'parallel_config.model_parallel', but got the ffn_hidden_size is {} "
"and parallel_config.model_parallel is {}."
.format(ffn_hidden_size, parallel_config.model_parallel))
if use_past is True: