diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index f088e087a7a..cb8833269cd 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -41,7 +41,7 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, grad
         beta2 (Tensor): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
         eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0.
         lr (Tensor): Learning rate.
-        weight_decay_tensor (Tensor): Weight decay. Should be equal to or greater than 0.
+        weight_decay_tensor (Tensor): Weight decay. Should be in range [0.0, 1.0].
         param (Tensor): Parameters.
         m (Tensor): m value of parameters.
         v (Tensor): v value of parameters.
@@ -252,8 +252,8 @@ class Adam(Optimizer):
         use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
             If True, updates the gradients using NAG.
             If False, updates the gradients without using NAG. Default: False.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
-        loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0.
+        loss_scale (float): A floating point value for the loss scale. Should be not less than 1.0. Default: 1.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -392,7 +392,7 @@ class AdamWeightDecay(Optimizer):
             Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
@@ -457,7 +457,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
             Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index c9f12dc6d48..e2e20507634 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -128,7 +128,7 @@ class FTRL(Optimizer):
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
         use_locking (bool): If True use locks for update operation. Default: False.
         loss_scale (float): Value for the loss scale. It should be equal to or greater than 1.0. Default: 1.0.
-        wegith_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
+        wegith_decay (float): Weight decay value to multiply weight, should be in range [0.0, 1.0]. Default: 0.0.
 
     Inputs:
         - **grads** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is as same as the `params`
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index 93c7edbce84..143917336d3 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -44,7 +44,7 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para
         beta2 (Tensor): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
         eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0.
         lr (Tensor): Learning rate.
-        weight_decay_tensor (Tensor): Weight decay. Should be equal to or greater than 0.
+        weight_decay_tensor (Tensor): Weight decay. Should be in range [0.0, 1.0].
         global_step (Tensor): Global step.
         param (Tensor): Parameters.
         m (Tensor): m value of parameters.
@@ -128,7 +128,7 @@ def _update_run_op_graph_kernel(beta1, beta2, eps, lr, weight_decay_tensor,
         beta2 (Tensor): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
         eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0.
         lr (Tensor): Learning rate.
-        weight_decay_tensor (Tensor): Weight decay. Should be equal to or greater than 0.
+        weight_decay_tensor (Tensor): Weight decay. Should be in range [0.0, 1.0].
         global_step (Tensor): Global step.
         param (Tensor): Parameters.
         m (Tensor): m value of parameters.
@@ -229,7 +229,7 @@ class Lamb(Optimizer):
             Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.0. Should be equal to or greater than 0.
+        weight_decay (float): Weight decay (L2 penalty). Default: 0.0. Should be in range [0.0, 1.0].
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
             lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index 5bd62cd092f..d784c88e448 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -133,7 +133,7 @@ class LazyAdam(Optimizer):
             If True, updates the gradients using NAG.
             If False, updates the gradients without using NAG. Default: False.
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
-        loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default:
+        loss_scale (float): A floating point value for the loss scale. It should be not less than 1.0. Default:
                             1.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py
index addb71a652f..3beafa0775f 100755
--- a/mindspore/nn/optim/momentum.py
+++ b/mindspore/nn/optim/momentum.py
@@ -83,8 +83,8 @@ class Momentum(Optimizer):
                                                              or greater than 0.0.
         momentum (float): Hyperparameter of type float, means momentum for the moving average.
             It should be at least 0.0.
-        weight_decay (int, float): Weight decay (L2 penalty). It should be equal to or greater than 0.0. Default: 0.0.
-        loss_scale (int, float): A floating point value for the loss scale. It should be greater than 0.0. Default: 1.0.
+        weight_decay (int, float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0.
+        loss_scale (int, float): A floating point value for the loss scale. Should be not less than 1.0. Default: 1.0.
         use_nesterov (bool): Enable Nesterov momentum. Default: False.
 
     Inputs:
diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py
index 37a4aee6998..54d4f44f871 100755
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -79,10 +79,9 @@ class Optimizer(Cell):
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
               in the value of 'order_params' should be in one of group parameters.
 
-        weight_decay (float): A floating point value for the weight decay. It should be not less than 0 and not
-                              greater than 1.
+        weight_decay (float): A floating point value for the weight decay. It should be in range [0.0, 1.0].
             If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0.
-        loss_scale (float): A floating point value for the loss scale. It should be not less than 1. If the
+        loss_scale (float): A floating point value for the loss scale. It should be not less than 1.0. If the
             type of `loss_scale` input is int, it will be converted to float. Default: 1.0.
 
     Raises:
@@ -333,8 +332,8 @@ class Optimizer(Cell):
 
             if 'weight_decay' in group_param.keys():
                 validator.check_float_legal_value('weight_decay', group_param['weight_decay'], None)
-                validator.check_number_range('weight_decay', group_param['weight_decay'], 0.0, float("inf"),
-                                             Rel.INC_LEFT, self.cls_name)
+                validator.check_number_range('weight_decay', group_param['weight_decay'], 0.0, 1.0,
+                                             Rel.INC_BOTH, self.cls_name)
                 weight_decay_ = group_param['weight_decay'] * self.loss_scale
             else:
                 weight_decay_ = weight_decay * self.loss_scale
diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py
index 948868322ec..daa41d1ae8a 100644
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -71,8 +71,8 @@ class ProximalAdagrad(Optimizer):
         l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
         use_locking (bool): If True use locks for update operation. Default: False.
-        loss_scale (float): Value for the loss scale. It should be greater than 0.0. Default: 1.0.
-        wegith_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
+        loss_scale (float): Value for the loss scale. It should be not less than 1.0. Default: 1.0.
+        wegith_decay (float): Weight decay value to multiply weight, should be in range [0.0, 1.0]. Default: 0.0.
 
     Inputs:
         - **grads** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is as same as the `params`
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index 71fecc43a7c..75d4d6b0aca 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -123,8 +123,8 @@ class RMSProp(Optimizer):
                          0. Default: 1e-10.
         use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
         centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False.
-        loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
-        weight_decay (float): Weight decay (L2 penalty). Should be equal to or greater than 0. Default: 0.0.
+        loss_scale (float): A floating point value for the loss scale. Should be not less than 1.0. Default: 1.0.
+        weight_decay (float): Weight decay (L2 penalty). Should be in range [0.0, 1.0]. Default: 0.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index 43f001ea248..410539d9304 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -76,10 +76,9 @@ class SGD(Optimizer):
                                                         greater than 0. Default: 0.1.
         momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0.
         dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be in range [0.0, 1.0]. Default: 0.0.
         nesterov (bool): Enables the Nesterov momentum. Default: False.
-        loss_scale (float): A floating point value for the loss scale, which should be larger
-                            than 0.0. Default: 1.0.
+        loss_scale (float): A floating point value for the loss scale. Should be not less than 1.0. Default: 1.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.