!24081 modify parallel communcation note

Merge pull request !24081 from lilei/modify_parallel_note
This commit is contained in:
i-robot 2021-09-26 10:17:03 +00:00 committed by Gitee
commit ecb5010433
3 changed files with 23 additions and 9 deletions

View File

@ -58,17 +58,17 @@ def init_to_value(init):
class Parameter(Tensor_):
"""
A object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`.
An object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`.
Note:
In auto_parallel mode of "semi_auto_parallel" and "auto_parallel", if init `Parameter` by
an `Tensor`, the type of Parameter will be `Tensor`. `Tensor`
a `Tensor`, the type of Parameter will be `Tensor`. `Tensor`
will save the shape and type info of a tensor with no memory usage. The shape can be changed while
compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data.
If there is an operator in the network that requires part of the inputs to be Parameter,
then the Parameters as this part of the inputs are not allowed to be cast.
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
otherwise, the parameter name may be different than expected.
otherwise, the parameter name may be different from expected.
Args:
default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data,
@ -255,7 +255,7 @@ class Parameter(Tensor_):
"""
Get the new parameter after call the init_data.
Default is a None, If `self` is a Parameter with out data, after call the
Default is a None, If `self` is a Parameter without data, after call the
`init_data` the initialized Parameter with data will be recorded here.
"""
return self._inited_param

View File

@ -91,7 +91,7 @@ DEFAULT_BACKEND = Backend("hccl")
class GlobalComm:
"""
World communication information. The GlobalComm is a global class. The members contains: BACKEND, WORLD_COMM_GROUP.
World communication information. The GlobalComm is a global class. The members contain: BACKEND, WORLD_COMM_GROUP.
"""
BACKEND = DEFAULT_BACKEND
WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP

View File

@ -86,8 +86,8 @@ def init(backend_name=None):
This method should be used after set_context.
Args:
backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will
recognize `device_target` by devices. Default: None.
backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will recognize
`device_target` by devices. Default: None.
Raises:
TypeError: If `backend_name` is not a string.
@ -176,7 +176,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
ValueError: If backend is invalid.
RuntimeError: If HCCL/NCCL is not available.
>>> from mindspore.communication import init, get_rank()
>>> from mindspore.communication import init, get_rank
>>> init()
>>> rank_id = get_rank()
>>> print(rank_id)
@ -208,11 +208,12 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
RuntimeError: If HCCL is not available or MindSpore is GPU version.
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.communication.management import init, get_rank, get_local_rank
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
>>> init()
>>> world_rank = get_rank() # rank_id is 9.
>>> local_rank = get_local_rank()
>>> print("local_rank is: {}, world_rank is {}"%(local_rank, world_rank))
>>> print("local_rank is: {}, world_rank is {}".format(local_rank, world_rank))
local_rank is: 1, world_rank is 9
"""
if not isinstance(group, str):
@ -238,6 +239,15 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
TypeError: If group is not a string.
ValueError: If backend is invalid.
RuntimeError: If HCCL/NCCL is not available.
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.communication.management import init, get_group_size
>>> set_context(device_target="Ascend", device_num=8)
>>> init()
>>> group_size = get_group_size()
>>> print("group_size is: ", group_size)
group_size is: 8
"""
if not isinstance(group, str):
raise TypeError("Group name must be a string, but got {}".format(type(group)))
@ -265,6 +275,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
RuntimeError: If HCCL is not available or MindSpore is GPU version.
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.communication.management import init, get_local_rank_size
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
>>> init()
>>> local_rank_size = get_local_rank_size()
@ -300,6 +311,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.communication.management import init, create_group, get_world_rank_from_group_rank
>>> set_context(device_target="Ascend")
>>> init()
>>> group = "0-4"
@ -338,6 +350,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.communication.management import init, create_group, get_group_rank_from_world_rank
>>> set_context(device_target="Ascend")
>>> init()
>>> group = "0-4"
@ -377,6 +390,7 @@ def create_group(group, rank_ids):
Examples:
>>> from mindspore.context import set_context
>>> from mindspore.ops import operations as ops
>>> from mindspore.communication.management import init, create_group
>>> set_context(device_target="Ascend")
>>> init()
>>> group = "0-8"