diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py index 5ce7bd28232..7dd5ea8a5b0 100755 --- a/mindspore/nn/optim/momentum.py +++ b/mindspore/nn/optim/momentum.py @@ -58,13 +58,13 @@ class Momentum(Optimizer): If use_nesterov is True: .. math:: - p_{t} = grad \ast lr + v_{t} \ast u \ast lr + p_{t} = p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr) If use_nesterov is Flase: .. math:: - p_{t} = lr \ast v_{t} + p_{t} = p_{t-1} - lr \ast v_{t} - Here: where grad, lr, p, v and u denote the gradients, learning_rate, parameter, accum, and momentum respectively. + Here: where grad, lr, p, v and u denote the gradients, learning_rate, params, moments, and momentum respectively. Args: params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py index 406ad9f48dc..48011042151 100755 --- a/mindspore/nn/optim/sgd.py +++ b/mindspore/nn/optim/sgd.py @@ -57,7 +57,7 @@ class SGD(Optimizer): .. math:: p_{t+1} = p_{t} - lr \ast v_{t+1} - To be notice, for the first step, v_{t+1} = gradient + To be noticed, for the first step, v_{t+1} = gradient Here : where p, v and u denote the parameters, accum, and momentum respectively. @@ -89,8 +89,8 @@ class SGD(Optimizer): momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0. dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0. weight_decay (float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0. - nesterov (bool): Enables the Nesterov momentum. If use nesterov, momentum must greater then 0, - and dampening must equal to 1. Default: False. + nesterov (bool): Enables the Nesterov momentum. If use nesterov, momentum must be positive, + and dampening must equal to 0.0. Default: False. loss_scale (float): A floating point value for the loss scale. Should be not less than 1.0. Default: 1.0. Inputs: @@ -134,8 +134,8 @@ class SGD(Optimizer): if isinstance(momentum, float) and momentum < 0.0: raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum)) - if nesterov and (momentum <= 0 or dampening != 0): - raise ValueError("If use nesterov, momentum must be positive and dampening must equal to 0," + if nesterov and (momentum <= 0.0 or dampening != 0.0): + raise ValueError("If use nesterov, momentum must be positive and dampening must equal to 0.0," "but got momentum {}, dampening {}".format(momentum, dampening)) if isinstance(dampening, int):