update the documentation of MaxPoolWithArgmax, BroadcastTo, GlobalBatchNorm and DistributedGradReducer operators.

This commit is contained in:
wangshuide2020 2020-12-31 10:38:45 +08:00
parent 882a582eb4
commit 0f75370377
4 changed files with 42 additions and 43 deletions

View File

@ -575,7 +575,7 @@ class GlobalBatchNorm(_BatchNorm):
>>>
>>> device_id = int(os.environ["DEVICE_ID"])
>>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True,
>>> device_id=int(device_id))
... device_id=int(device_id))
>>> init()
>>> context.reset_auto_parallel_context()
>>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)

View File

@ -268,47 +268,45 @@ class DistributedGradReducer(Cell):
>>> context.reset_auto_parallel_context()
>>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
>>>
>>>
>>> class TrainingWrapper(nn.Cell):
>>> def __init__(self, network, optimizer, sens=1.0):
>>> super(TrainingWrapper, self).__init__(auto_prefix=False)
>>> self.network = network
>>> self.network.add_flags(defer_inline=True)
>>> self.weights = optimizer.parameters
>>> self.optimizer = optimizer
>>> self.grad = C.GradOperation(get_by_list=True, sens_param=True)
>>> self.sens = sens
>>> self.reducer_flag = False
>>> self.grad_reducer = None
>>> self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
>>> if self.parallel_mode in [ParallelMode.DATA_PARALLEL,
>>> ParallelMode.HYBRID_PARALLEL]:
>>> self.reducer_flag = True
>>> if self.reducer_flag:
>>> mean = _get_gradients_mean()
>>> degree = _get_device_num()
>>> self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
>>>
>>> def construct(self, *args):
>>> weights = self.weights
>>> loss = self.network(*args)
>>> sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
>>> grads = self.grad(self.network, weights)(*args, sens)
>>> if self.reducer_flag:
>>> # apply grad reducer on grads
>>> grads = self.grad_reducer(grads)
>>> return F.depend(loss, self.optimizer(grads))
... def __init__(self, network, optimizer, sens=1.0):
... super(TrainingWrapper, self).__init__(auto_prefix=False)
... self.network = network
... self.network.add_flags(defer_inline=True)
... self.weights = optimizer.parameters
... self.optimizer = optimizer
... self.grad = C.GradOperation(get_by_list=True, sens_param=True)
... self.sens = sens
... self.reducer_flag = False
... self.grad_reducer = None
... self.parallel_mode = context.get_auto_parallel_context("parallel_mode")
... if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
... self.reducer_flag = True
... if self.reducer_flag:
... mean = _get_gradients_mean()
... degree = _get_device_num()
... self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
...
... def construct(self, *args):
... weights = self.weights
... loss = self.network(*args)
... sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
... grads = self.grad(self.network, weights)(*args, sens)
... if self.reducer_flag:
... # apply grad reducer on grads
... grads = self.grad_reducer(grads)
... return F.depend(loss, self.optimizer(grads))
>>>
>>> class Net(nn.Cell):
>>> def __init__(self, in_features, out_features):
>>> super(Net, self).__init__()
>>> self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
>>> name='weight')
>>> self.matmul = P.MatMul()
>>>
>>> def construct(self, x):
>>> output = self.matmul(x, self.weight)
>>> return output
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.matmul = P.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... return output
>>>
>>> size, in_features, out_features = 16, 16, 10
>>> network = Net(in_features, out_features)

View File

@ -4105,12 +4105,12 @@ class BroadcastTo(PrimitiveWithInfer):
When input shape is broadcast to target shape, it starts with the trailing dimensions.
Raises:
ValueError: Given a shape tuple, if it has several -1s; or if the -1 is in an invalid position
such as one that does not have a opposing dimension in an input tensor; of if the target and
ValueError: Given a shape tuple, if it has several -1; or if the -1 is in an invalid position
such as one that does not have a opposing dimension in an input tensor; or if the target and
input shapes are incompatiable.
Args:
shape (tuple): The target shape to broadcast. Can be fully specified, or have '-1's in one position
shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
where it will be substituted by the input tensor's shape in that position, see example.
Inputs:

View File

@ -1566,7 +1566,8 @@ class MaxPoolWithArgmax(_Pool):
Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
- **output** (Tensor) - Maxpooling result, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
- **mask** (Tensor) - Max values' index represented by the mask.
It has the same data type as `input`.
- **mask** (Tensor) - Max values' index represented by the mask. Data type is int32.
Supported Platforms:
``Ascend``