forked from mindspore-Ecosystem/mindspore
!24081 modify parallel communcation note
Merge pull request !24081 from lilei/modify_parallel_note
This commit is contained in:
commit
ecb5010433
|
@ -58,17 +58,17 @@ def init_to_value(init):
|
||||||
|
|
||||||
class Parameter(Tensor_):
|
class Parameter(Tensor_):
|
||||||
"""
|
"""
|
||||||
A object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`.
|
An object holding weights of cells, after initialized `Parameter` is a subtype of `Tensor`.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
In auto_parallel mode of "semi_auto_parallel" and "auto_parallel", if init `Parameter` by
|
In auto_parallel mode of "semi_auto_parallel" and "auto_parallel", if init `Parameter` by
|
||||||
an `Tensor`, the type of Parameter will be `Tensor`. `Tensor`
|
a `Tensor`, the type of Parameter will be `Tensor`. `Tensor`
|
||||||
will save the shape and type info of a tensor with no memory usage. The shape can be changed while
|
will save the shape and type info of a tensor with no memory usage. The shape can be changed while
|
||||||
compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data.
|
compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data.
|
||||||
If there is an operator in the network that requires part of the inputs to be Parameter,
|
If there is an operator in the network that requires part of the inputs to be Parameter,
|
||||||
then the Parameters as this part of the inputs are not allowed to be cast.
|
then the Parameters as this part of the inputs are not allowed to be cast.
|
||||||
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
|
It is recommended to use the default value of `name` when initialize a parameter as one attribute of a cell,
|
||||||
otherwise, the parameter name may be different than expected.
|
otherwise, the parameter name may be different from expected.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data,
|
default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data,
|
||||||
|
@ -255,7 +255,7 @@ class Parameter(Tensor_):
|
||||||
"""
|
"""
|
||||||
Get the new parameter after call the init_data.
|
Get the new parameter after call the init_data.
|
||||||
|
|
||||||
Default is a None, If `self` is a Parameter with out data, after call the
|
Default is a None, If `self` is a Parameter without data, after call the
|
||||||
`init_data` the initialized Parameter with data will be recorded here.
|
`init_data` the initialized Parameter with data will be recorded here.
|
||||||
"""
|
"""
|
||||||
return self._inited_param
|
return self._inited_param
|
||||||
|
|
|
@ -91,7 +91,7 @@ DEFAULT_BACKEND = Backend("hccl")
|
||||||
|
|
||||||
class GlobalComm:
|
class GlobalComm:
|
||||||
"""
|
"""
|
||||||
World communication information. The GlobalComm is a global class. The members contains: BACKEND, WORLD_COMM_GROUP.
|
World communication information. The GlobalComm is a global class. The members contain: BACKEND, WORLD_COMM_GROUP.
|
||||||
"""
|
"""
|
||||||
BACKEND = DEFAULT_BACKEND
|
BACKEND = DEFAULT_BACKEND
|
||||||
WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
|
WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
|
||||||
|
|
|
@ -86,8 +86,8 @@ def init(backend_name=None):
|
||||||
This method should be used after set_context.
|
This method should be used after set_context.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will
|
backend_name (str): Backend, using HCCL/NCCL. If the `backend_name` is None, system will recognize
|
||||||
recognize `device_target` by devices. Default: None.
|
`device_target` by devices. Default: None.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If `backend_name` is not a string.
|
TypeError: If `backend_name` is not a string.
|
||||||
|
@ -176,7 +176,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
||||||
ValueError: If backend is invalid.
|
ValueError: If backend is invalid.
|
||||||
RuntimeError: If HCCL/NCCL is not available.
|
RuntimeError: If HCCL/NCCL is not available.
|
||||||
|
|
||||||
>>> from mindspore.communication import init, get_rank()
|
>>> from mindspore.communication import init, get_rank
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> rank_id = get_rank()
|
>>> rank_id = get_rank()
|
||||||
>>> print(rank_id)
|
>>> print(rank_id)
|
||||||
|
@ -208,11 +208,12 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
||||||
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
||||||
Examples:
|
Examples:
|
||||||
>>> from mindspore.context import set_context
|
>>> from mindspore.context import set_context
|
||||||
|
>>> from mindspore.communication.management import init, get_rank, get_local_rank
|
||||||
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
|
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> world_rank = get_rank() # rank_id is 9.
|
>>> world_rank = get_rank() # rank_id is 9.
|
||||||
>>> local_rank = get_local_rank()
|
>>> local_rank = get_local_rank()
|
||||||
>>> print("local_rank is: {}, world_rank is {}"%(local_rank, world_rank))
|
>>> print("local_rank is: {}, world_rank is {}".format(local_rank, world_rank))
|
||||||
local_rank is: 1, world_rank is 9
|
local_rank is: 1, world_rank is 9
|
||||||
"""
|
"""
|
||||||
if not isinstance(group, str):
|
if not isinstance(group, str):
|
||||||
|
@ -238,6 +239,15 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
|
||||||
TypeError: If group is not a string.
|
TypeError: If group is not a string.
|
||||||
ValueError: If backend is invalid.
|
ValueError: If backend is invalid.
|
||||||
RuntimeError: If HCCL/NCCL is not available.
|
RuntimeError: If HCCL/NCCL is not available.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> from mindspore.context import set_context
|
||||||
|
>>> from mindspore.communication.management import init, get_group_size
|
||||||
|
>>> set_context(device_target="Ascend", device_num=8)
|
||||||
|
>>> init()
|
||||||
|
>>> group_size = get_group_size()
|
||||||
|
>>> print("group_size is: ", group_size)
|
||||||
|
group_size is: 8
|
||||||
"""
|
"""
|
||||||
if not isinstance(group, str):
|
if not isinstance(group, str):
|
||||||
raise TypeError("Group name must be a string, but got {}".format(type(group)))
|
raise TypeError("Group name must be a string, but got {}".format(type(group)))
|
||||||
|
@ -265,6 +275,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
|
||||||
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
||||||
Examples:
|
Examples:
|
||||||
>>> from mindspore.context import set_context
|
>>> from mindspore.context import set_context
|
||||||
|
>>> from mindspore.communication.management import init, get_local_rank_size
|
||||||
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
|
>>> set_context(device_target="Ascend", device_num=16) # 2 server, each server with 8 NPU.
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> local_rank_size = get_local_rank_size()
|
>>> local_rank_size = get_local_rank_size()
|
||||||
|
@ -300,6 +311,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> from mindspore.context import set_context
|
>>> from mindspore.context import set_context
|
||||||
|
>>> from mindspore.communication.management import init, create_group, get_world_rank_from_group_rank
|
||||||
>>> set_context(device_target="Ascend")
|
>>> set_context(device_target="Ascend")
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> group = "0-4"
|
>>> group = "0-4"
|
||||||
|
@ -338,6 +350,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> from mindspore.context import set_context
|
>>> from mindspore.context import set_context
|
||||||
|
>>> from mindspore.communication.management import init, create_group, get_group_rank_from_world_rank
|
||||||
>>> set_context(device_target="Ascend")
|
>>> set_context(device_target="Ascend")
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> group = "0-4"
|
>>> group = "0-4"
|
||||||
|
@ -377,6 +390,7 @@ def create_group(group, rank_ids):
|
||||||
Examples:
|
Examples:
|
||||||
>>> from mindspore.context import set_context
|
>>> from mindspore.context import set_context
|
||||||
>>> from mindspore.ops import operations as ops
|
>>> from mindspore.ops import operations as ops
|
||||||
|
>>> from mindspore.communication.management import init, create_group
|
||||||
>>> set_context(device_target="Ascend")
|
>>> set_context(device_target="Ascend")
|
||||||
>>> init()
|
>>> init()
|
||||||
>>> group = "0-8"
|
>>> group = "0-8"
|
||||||
|
|
Loading…
Reference in New Issue