!37263 mv the interface is_dynamic_lr_or_weight_decay into inner
Merge pull request !37263 from zhangbuxue/mv_the_interface_is_dynamic_lr_or_weight_decay_into_inner
This commit is contained in:
commit
ad86e9cf0a
|
@ -30,8 +30,8 @@ class MIND_API BatchNormGradGrad : public BaseOperator {
|
||||||
public:
|
public:
|
||||||
MIND_API_BASE_MEMBER(BatchNormGradGrad);
|
MIND_API_BASE_MEMBER(BatchNormGradGrad);
|
||||||
BatchNormGradGrad() : BaseOperator(kNameBatchNormGradGrad) {
|
BatchNormGradGrad() : BaseOperator(kNameBatchNormGradGrad) {
|
||||||
InitIOName({"dy", "x", "scale", "mean", "variance", "dout_dx", "dout_dscale", "dout_dbias"},
|
InitIOName({"x", "dy", "scale", "mean", "variance", "dout_dx", "dout_dscale", "dout_dbias"},
|
||||||
{"ddy", "dx", "dscale"});
|
{"dx", "ddy", "dscale"});
|
||||||
}
|
}
|
||||||
void Init(bool is_training = false, float epsilon = 1e-05, const std::string &format = "NCHW");
|
void Init(bool is_training = false, float epsilon = 1e-05, const std::string &format = "NCHW");
|
||||||
void set_is_training(bool is_training);
|
void set_is_training(bool is_training);
|
||||||
|
|
|
@ -181,7 +181,7 @@ class ASGD(Optimizer):
|
||||||
gradients = self.gradients_centralization(gradients)
|
gradients = self.gradients_centralization(gradients)
|
||||||
gradients = self.scale_grad(gradients)
|
gradients = self.scale_grad(gradients)
|
||||||
lrs = self.get_lr()
|
lrs = self.get_lr()
|
||||||
if not self.is_dynamic_lr_or_weight_decay():
|
if not self._is_dynamic_lr_or_weight_decay():
|
||||||
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
||||||
success = True
|
success = True
|
||||||
params = self._parameters
|
params = self._parameters
|
||||||
|
|
|
@ -260,7 +260,7 @@ class Lamb(Optimizer):
|
||||||
def construct(self, gradients):
|
def construct(self, gradients):
|
||||||
weight_decay = self.get_weight_decay()
|
weight_decay = self.get_weight_decay()
|
||||||
lr = self.get_lr()
|
lr = self.get_lr()
|
||||||
if not self.is_dynamic_lr_or_weight_decay():
|
if not self._is_dynamic_lr_or_weight_decay():
|
||||||
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
||||||
lamb_opt = _lamb_opt
|
lamb_opt = _lamb_opt
|
||||||
gradients = self.flatten_gradients(gradients)
|
gradients = self.flatten_gradients(gradients)
|
||||||
|
|
|
@ -195,6 +195,6 @@ class LARS(Optimizer):
|
||||||
gradients = self.hyper_map(F.partial(_lars_opt, self.lars, self.loss_scale, lr, weight_decay),
|
gradients = self.hyper_map(F.partial(_lars_opt, self.lars, self.loss_scale, lr, weight_decay),
|
||||||
gradients, params, self.decay_flags, self.lars_flag)
|
gradients, params, self.decay_flags, self.lars_flag)
|
||||||
success = self.opt(gradients)
|
success = self.opt(gradients)
|
||||||
if self.is_dynamic_lr_or_weight_decay() and not self.opt.is_dynamic_lr_or_weight_decay():
|
if self._is_dynamic_lr_or_weight_decay() and not self.opt.dynamic_lr and not self.opt.dynamic_weight_decay:
|
||||||
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
||||||
return success
|
return success
|
||||||
|
|
|
@ -695,7 +695,7 @@ class Optimizer(Cell):
|
||||||
lr += (current_dynamic_lr,)
|
lr += (current_dynamic_lr,)
|
||||||
else:
|
else:
|
||||||
lr = self.learning_rate(self.global_step).reshape(())
|
lr = self.learning_rate(self.global_step).reshape(())
|
||||||
if self.is_dynamic_lr_or_weight_decay():
|
if self._is_dynamic_lr_or_weight_decay():
|
||||||
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
||||||
return lr
|
return lr
|
||||||
|
|
||||||
|
@ -754,7 +754,7 @@ class Optimizer(Cell):
|
||||||
|
|
||||||
return lr if isinstance(param, list) else lr[0]
|
return lr if isinstance(param, list) else lr[0]
|
||||||
|
|
||||||
def is_dynamic_lr_or_weight_decay(self):
|
def _is_dynamic_lr_or_weight_decay(self):
|
||||||
"""
|
"""
|
||||||
Determine whether the learning rate or weight decay is dynamic.
|
Determine whether the learning rate or weight decay is dynamic.
|
||||||
|
|
||||||
|
|
|
@ -196,7 +196,7 @@ class Rprop(Optimizer):
|
||||||
gradients = self.gradients_centralization(gradients)
|
gradients = self.gradients_centralization(gradients)
|
||||||
gradients = self.scale_grad(gradients)
|
gradients = self.scale_grad(gradients)
|
||||||
lrs = self.get_lr()
|
lrs = self.get_lr()
|
||||||
if not self.is_dynamic_lr_or_weight_decay():
|
if not self._is_dynamic_lr_or_weight_decay():
|
||||||
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
self.assignadd(self.global_step, self.global_step_increase_tensor)
|
||||||
success = True
|
success = True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue