!35471 fix nllloss and crossentropy bug and description typo error

Merge pull request !35471 from 吕昱峰（Nate.River）/nllloss
2022-06-07 01:48:40 +00:00 · 2022-06-07 01:48:40 +00:00 · 62653a9697
parent 0e842e7a2e 9ec87173fe
commit 62653a9697
6 changed files with 16 additions and 19 deletions
--- a/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst
@ -13,7 +13,7 @@ mindspore.nn.CrossEntropyLoss

          \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
          l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}

      其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。

@ -22,7 +22,7 @@ mindspore.nn.CrossEntropyLoss
      .. math::

          \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
              \text{if reduction} = \text{'mean';}\\
              \sum_{n=1}^N l_n,  &
              \text{if reduction} = \text{'sum'.}
--- a/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst
+++ b/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst
@ -11,7 +11,7 @@ mindspore.nn.NLLLoss
        \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
        \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
        \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},

    其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。

--- a/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst
@ -13,7 +13,7 @@

          \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
          l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}

      其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。

@ -22,7 +22,7 @@
      .. math::

          \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
              \text{if reduction} = \text{'mean';}\\
              \sum_{n=1}^N l_n,  &
              \text{if reduction} = \text{'sum'.}
--- a/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst
@ -11,7 +11,7 @@
        \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
        \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
        \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},

    其中， :math:`x` 表示预测值， :math:`t` 表示目标值， :math:`w` 表示权重，N表示batch size， :math:`c` 限定范围为[0, C-1]，表示类索引，其中 :math:`C` 表示类的数量。

--- a/mindspore/python/mindspore/nn/loss/loss.py
+++ b/mindspore/python/mindspore/nn/loss/loss.py
@ -1602,7 +1602,7 @@ class NLLLoss(LossBase):

        \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
        \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
-        \quad w_{c}=\text { weight }[c] \cdot 1
+        \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}\{c \not= \text{ignore_index}\}

    where :math:`x` is the logits, :math:`t` is the labels, :math:`w` is the weight,
    N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@ -1683,7 +1683,7 @@ class CrossEntropyLoss(LossBase):

          \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
          l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}

      where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
      N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@ -1693,7 +1693,7 @@ class CrossEntropyLoss(LossBase):
      .. math::

          \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
              \text{if reduction} = \text{`mean';}\\
              \sum_{n=1}^N l_n,  &
              \text{if reduction} = \text{`sum'.}
--- a/mindspore/python/mindspore/ops/function/nn_func.py
+++ b/mindspore/python/mindspore/ops/function/nn_func.py
@ -261,7 +261,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea

          \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
          l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
-          \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
+          \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}

      where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
      N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@ -271,7 +271,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea
      .. math::

          \ell(x, y) = \begin{cases}
-              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, &
+              \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
              \text{if reduction} = \text{`mean';}\\
              \sum_{n=1}^N l_n,  &
              \text{if reduction} = \text{`sum'.}
@ -363,7 +363,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=None, reduction='mean', l
        \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top},
        \quad l_{n}=-w_{t_{n}} x_{n, t_{n}},
        \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}
-        \{c \not= \text{ignore\_index}\},
+        \{c \not= \text{ignore_index}\},

    where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight,
    N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
@ -441,19 +441,16 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red
        loss = loss.masked_fill(non_pad_mask, 0.)
        loss_weights = loss_weights.masked_fill(non_pad_mask, 0.)
        smooth_loss = smooth_loss.masked_fill(non_pad_mask, 0.)
-    else:
-        loss = loss.squeeze(target_dim)
-        smooth_loss = smooth_loss.squeeze(target_dim)
+
+    loss = loss.squeeze(target_dim)
+    smooth_loss = smooth_loss.squeeze(target_dim)

    if reduction == 'sum':
        loss = loss.sum()
        smooth_loss = smooth_loss.sum()
-    elif reduction == 'mean':
+    if reduction == 'mean':
        loss = loss.sum() / loss_weights.sum()
        smooth_loss = smooth_loss.mean()
-    else:
-        loss = loss.sum(target_dim)
-        smooth_loss = smooth_loss.sum(target_dim)

    eps_i = label_smoothing / inputs.shape[target_dim]
    loss = (1. - label_smoothing) * loss + eps_i * smooth_loss