diff --git a/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst b/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst
index ca2430f51c1..fd34f13418f 100644
--- a/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst
@@ -13,7 +13,7 @@ mindspore.nn.CrossEntropyLoss
 
           \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
           l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
 
       其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。
 
@@ -22,7 +22,7 @@ mindspore.nn.CrossEntropyLoss
       .. math::
 
           \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
               \text{if reduction} = \text{'mean';}\\
               \sum_{n=1}^N l_n,  &
               \text{if reduction} = \text{'sum'.}
diff --git a/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst b/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst
index 391ddde3d45..71c6989fab1 100644
--- a/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst
@@ -11,7 +11,7 @@ mindspore.nn.NLLLoss
         \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
         \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
         \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},
 
     其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。
 
diff --git a/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst b/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst
index 89706667287..a03317a17dd 100644
--- a/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst
@@ -13,7 +13,7 @@
 
           \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
           l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
 
       其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。
 
@@ -22,7 +22,7 @@
       .. math::
 
           \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
               \text{if reduction} = \text{'mean';}\\
               \sum_{n=1}^N l_n,  &
               \text{if reduction} = \text{'sum'.}
diff --git a/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst b/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst
index b14f5068b24..b6724dfe8b5 100644
--- a/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst
@@ -11,7 +11,7 @@
         \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
         \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
         \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},
 
     其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。
 
diff --git a/mindspore/python/mindspore/nn/loss/loss.py b/mindspore/python/mindspore/nn/loss/loss.py
index b4c3f4d1d43..219fa0af917 100644
--- a/mindspore/python/mindspore/nn/loss/loss.py
+++ b/mindspore/python/mindspore/nn/loss/loss.py
@@ -1602,7 +1602,7 @@ class NLLLoss(LossBase):
 
         \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
         \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
-        \quad w_{c}=\text { weight }[c] \cdot 1
+        \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}\{c \not= \text{ignore_index}\}
 
     where :math:`x` is the logits, :math:`t` is the labels, :math:`w` is the weight,
     N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@@ -1683,7 +1683,7 @@ class CrossEntropyLoss(LossBase):
 
           \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
           l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
 
       where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
       N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@@ -1693,7 +1693,7 @@ class CrossEntropyLoss(LossBase):
       .. math::
 
           \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
               \text{if reduction} = \text{`mean';}\\
               \sum_{n=1}^N l_n,  &
               \text{if reduction} = \text{`sum'.}
diff --git a/mindspore/python/mindspore/ops/function/nn_func.py b/mindspore/python/mindspore/ops/function/nn_func.py
index aea075f1123..cc3f5de037b 100644
--- a/mindspore/python/mindspore/ops/function/nn_func.py
+++ b/mindspore/python/mindspore/ops/function/nn_func.py
@@ -261,7 +261,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea
 
           \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
           l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
 
       where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
       N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@@ -271,7 +271,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea
       .. math::
 
           \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
               \text{if reduction} = \text{`mean';}\\
               \sum_{n=1}^N l_n,  &
               \text{if reduction} = \text{`sum'.}
@@ -363,7 +363,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=None, reduction='mean', l
         \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
         \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
         \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},
 
     where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
     N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@@ -441,19 +441,16 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
         loss = loss.masked_fill(non_pad_mask, 0.)
         loss_weights = loss_weights.masked_fill(non_pad_mask, 0.)
         smooth_loss = smooth_loss.masked_fill(non_pad_mask, 0.)
-    else:
-        loss = loss.squeeze(target_dim)
-        smooth_loss = smooth_loss.squeeze(target_dim)
+
+    loss = loss.squeeze(target_dim)
+    smooth_loss = smooth_loss.squeeze(target_dim)
 
     if reduction == 'sum':
         loss = loss.sum()
         smooth_loss = smooth_loss.sum()
-    elif reduction == 'mean':
+    if reduction == 'mean':
         loss = loss.sum() / loss_weights.sum()
         smooth_loss = smooth_loss.mean()
-    else:
-        loss = loss.sum(target_dim)
-        smooth_loss = smooth_loss.sum(target_dim)
 
     eps_i = label_smoothing / inputs.shape[target_dim]
     loss = (1. - label_smoothing) * loss + eps_i * smooth_loss