forked from mindspore-Ecosystem/mindspore
update the documentation of MaxPoolWithArgmax, BroadcastTo, GlobalBatchNorm and DistributedGradReducer operators.
This commit is contained in:
parent
882a582eb4
commit
0f75370377
|
@ -575,7 +575,7 @@ class GlobalBatchNorm(_BatchNorm):
|
|||
>>>
|
||||
>>> device_id = int(os.environ["DEVICE_ID"])
|
||||
>>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True,
|
||||
>>> device_id=int(device_id))
|
||||
... device_id=int(device_id))
|
||||
>>> init()
|
||||
>>> context.reset_auto_parallel_context()
|
||||
>>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
|
||||
|
|
|
@ -268,47 +268,45 @@ class DistributedGradReducer(Cell):
|
|||
>>> context.reset_auto_parallel_context()
|
||||
>>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
|
||||
>>>
|
||||
>>>
|
||||
>>> class TrainingWrapper(nn.Cell):
|
||||
>>> def __init__(self, network, optimizer, sens=1.0):
|
||||
>>> super(TrainingWrapper, self).__init__(auto_prefix=False)
|
||||
>>> self.network = network
|
||||
>>> self.network.add_flags(defer_inline=True)
|
||||
>>> self.weights = optimizer.parameters
|
||||
>>> self.optimizer = optimizer
|
||||
>>> self.grad = C.GradOperation(get_by_list=True, sens_param=True)
|
||||
>>> self.sens = sens
|
||||
>>> self.reducer_flag = False
|
||||
>>> self.grad_reducer = None
|
||||
>>> self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
|
||||
>>> if self.parallel_mode in [ParallelMode.DATA_PARALLEL,
|
||||
>>> ParallelMode.HYBRID_PARALLEL]:
|
||||
>>> self.reducer_flag = True
|
||||
>>> if self.reducer_flag:
|
||||
>>> mean = _get_gradients_mean()
|
||||
>>> degree = _get_device_num()
|
||||
>>> self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
|
||||
>>>
|
||||
>>> def construct(self, *args):
|
||||
>>> weights = self.weights
|
||||
>>> loss = self.network(*args)
|
||||
>>> sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
|
||||
>>> grads = self.grad(self.network, weights)(*args, sens)
|
||||
>>> if self.reducer_flag:
|
||||
>>> # apply grad reducer on grads
|
||||
>>> grads = self.grad_reducer(grads)
|
||||
>>> return F.depend(loss, self.optimizer(grads))
|
||||
... def __init__(self, network, optimizer, sens=1.0):
|
||||
... super(TrainingWrapper, self).__init__(auto_prefix=False)
|
||||
... self.network = network
|
||||
... self.network.add_flags(defer_inline=True)
|
||||
... self.weights = optimizer.parameters
|
||||
... self.optimizer = optimizer
|
||||
... self.grad = C.GradOperation(get_by_list=True, sens_param=True)
|
||||
... self.sens = sens
|
||||
... self.reducer_flag = False
|
||||
... self.grad_reducer = None
|
||||
... self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
|
||||
... if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
|
||||
... self.reducer_flag = True
|
||||
... if self.reducer_flag:
|
||||
... mean = _get_gradients_mean()
|
||||
... degree = _get_device_num()
|
||||
... self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
|
||||
...
|
||||
... def construct(self, *args):
|
||||
... weights = self.weights
|
||||
... loss = self.network(*args)
|
||||
... sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
|
||||
... grads = self.grad(self.network, weights)(*args, sens)
|
||||
... if self.reducer_flag:
|
||||
... # apply grad reducer on grads
|
||||
... grads = self.grad_reducer(grads)
|
||||
... return F.depend(loss, self.optimizer(grads))
|
||||
>>>
|
||||
>>> class Net(nn.Cell):
|
||||
>>> def __init__(self, in_features, out_features):
|
||||
>>> super(Net, self).__init__()
|
||||
>>> self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
>>> name='weight')
|
||||
>>> self.matmul = P.MatMul()
|
||||
>>>
|
||||
>>> def construct(self, x):
|
||||
>>> output = self.matmul(x, self.weight)
|
||||
>>> return output
|
||||
... def __init__(self, in_features, out_features):
|
||||
... super(Net, self).__init__()
|
||||
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
... name='weight')
|
||||
... self.matmul = P.MatMul()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... output = self.matmul(x, self.weight)
|
||||
... return output
|
||||
>>>
|
||||
>>> size, in_features, out_features = 16, 16, 10
|
||||
>>> network = Net(in_features, out_features)
|
||||
|
|
|
@ -4105,12 +4105,12 @@ class BroadcastTo(PrimitiveWithInfer):
|
|||
When input shape is broadcast to target shape, it starts with the trailing dimensions.
|
||||
|
||||
Raises:
|
||||
ValueError: Given a shape tuple, if it has several -1s; or if the -1 is in an invalid position
|
||||
such as one that does not have a opposing dimension in an input tensor; of if the target and
|
||||
ValueError: Given a shape tuple, if it has several -1; or if the -1 is in an invalid position
|
||||
such as one that does not have a opposing dimension in an input tensor; or if the target and
|
||||
input shapes are incompatiable.
|
||||
|
||||
Args:
|
||||
shape (tuple): The target shape to broadcast. Can be fully specified, or have '-1's in one position
|
||||
shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
|
||||
where it will be substituted by the input tensor's shape in that position, see example.
|
||||
|
||||
Inputs:
|
||||
|
|
|
@ -1566,7 +1566,8 @@ class MaxPoolWithArgmax(_Pool):
|
|||
Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
|
||||
|
||||
- **output** (Tensor) - Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
|
||||
- **mask** (Tensor) - Max values' index represented by the mask.
|
||||
It has the same data type as `input`.
|
||||
- **mask** (Tensor) - Max values' index represented by the mask. Data type is int32.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend``
|
||||
|
|
Loading…
Reference in New Issue