From 13841a5f90f1a8d44a972e80b9f1b7245ae08d8e Mon Sep 17 00:00:00 2001 From: lilei Date: Sat, 25 Sep 2021 11:10:45 +0800 Subject: [PATCH] modify parallel communcation note --- mindspore/common/parameter.py | 8 ++++---- mindspore/communication/_comm_helper.py | 2 +- mindspore/communication/management.py | 22 ++++++++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index d6fab949a11..30128970d14 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -58,17 +58,17 @@ def init_to_value(init): class Parameter(Tensor_): """ - A object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`. + An object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`. Note: In auto_parallel mode of "semi_auto_parallel" and "auto_parallel", if init `Parameter` by - an `Tensor`, the type of Parameter will be `Tensor`. `Tensor` + a `Tensor`, the type of Parameter will be `Tensor`. `Tensor` will save the shape and type info of a tensor with no memory usage. The shape can be changed while compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data. If there is an operator in the network that requires part of the inputs to be Parameter, then the Parameters as this part of the inputs are not allowed to be cast. It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell, - otherwise, the parameter name may be different than expected. + otherwise, the parameter name may be different from expected. Args: default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data, @@ -255,7 +255,7 @@ class Parameter(Tensor_): """ Get the new parameter after call the init_data. - Default is a None, If `self` is a Parameter with out data, after call the + Default is a None, If `self` is a Parameter without data, after call the `init_data` the initialized Parameter with data will be recorded here. """ return self._inited_param diff --git a/mindspore/communication/_comm_helper.py b/mindspore/communication/_comm_helper.py index 65be8b8f1ed..050939a95e2 100644 --- a/mindspore/communication/_comm_helper.py +++ b/mindspore/communication/_comm_helper.py @@ -91,7 +91,7 @@ DEFAULT_BACKEND = Backend("hccl") class GlobalComm: """ - World communication information. The GlobalComm is a global class. The members contains: BACKEND, WORLD_COMM_GROUP. + World communication information. The GlobalComm is a global class. The members contain: BACKEND, WORLD_COMM_GROUP. """ BACKEND = DEFAULT_BACKEND WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py index bf47ee0d9f5..9b111259b44 100755 --- a/mindspore/communication/management.py +++ b/mindspore/communication/management.py @@ -86,8 +86,8 @@ def init(backend_name=None): This method should be used after set_context. Args: - backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will - recognize `device_target` by devices. Default: None. + backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will recognize + `device_target` by devices. Default: None. Raises: TypeError: If `backend_name` is not a string. @@ -176,7 +176,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP): ValueError: If backend is invalid. RuntimeError: If HCCL/NCCL is not available. - >>> from mindspore.communication import init, get_rank() + >>> from mindspore.communication import init, get_rank >>> init() >>> rank_id = get_rank() >>> print(rank_id) @@ -208,11 +208,12 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP): RuntimeError: If HCCL is not available or MindSpore is GPU version. Examples: >>> from mindspore.context import set_context + >>> from mindspore.communication.management import init, get_rank, get_local_rank >>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU. >>> init() >>> world_rank = get_rank() # rank_id is 9. >>> local_rank = get_local_rank() - >>> print("local_rank is: {}, world_rank is {}"%(local_rank, world_rank)) + >>> print("local_rank is: {}, world_rank is {}".format(local_rank, world_rank)) local_rank is: 1, world_rank is 9 """ if not isinstance(group, str): @@ -238,6 +239,15 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP): TypeError: If group is not a string. ValueError: If backend is invalid. RuntimeError: If HCCL/NCCL is not available. + + Examples: + >>> from mindspore.context import set_context + >>> from mindspore.communication.management import init, get_group_size + >>> set_context(device_target="Ascend", device_num=8) + >>> init() + >>> group_size = get_group_size() + >>> print("group_size is: ", group_size) + group_size is: 8 """ if not isinstance(group, str): raise TypeError("Group name must be a string, but got {}".format(type(group))) @@ -265,6 +275,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP): RuntimeError: If HCCL is not available or MindSpore is GPU version. Examples: >>> from mindspore.context import set_context + >>> from mindspore.communication.management import init, get_local_rank_size >>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU. >>> init() >>> local_rank_size = get_local_rank_size() @@ -300,6 +311,7 @@ def get_world_rank_from_group_rank(group, group_rank_id): Examples: >>> from mindspore.context import set_context + >>> from mindspore.communication.management import init, create_group, get_world_rank_from_group_rank >>> set_context(device_target="Ascend") >>> init() >>> group = "0-4" @@ -338,6 +350,7 @@ def get_group_rank_from_world_rank(world_rank_id, group): Examples: >>> from mindspore.context import set_context + >>> from mindspore.communication.management import init, create_group, get_group_rank_from_world_rank >>> set_context(device_target="Ascend") >>> init() >>> group = "0-4" @@ -377,6 +390,7 @@ def create_group(group, rank_ids): Examples: >>> from mindspore.context import set_context >>> from mindspore.ops import operations as ops + >>> from mindspore.communication.management import init, create_group >>> set_context(device_target="Ascend") >>> init() >>> group = "0-8"