diff --git a/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst b/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst index ca2430f51c1..fd34f13418f 100644 --- a/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst +++ b/docs/api/api_python/nn/mindspore.nn.CrossEntropyLoss.rst @@ -13,7 +13,7 @@ mindspore.nn.CrossEntropyLoss \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})} - \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\} + \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\} 其中, :math:`x` 表示预测值, :math:`t` 表示目标值, :math:`w` 表示权重,N表示batch size, :math:`c` 限定范围为[0, C-1],表示类索引,其中 :math:`C` 表示类的数量。 @@ -22,7 +22,7 @@ mindspore.nn.CrossEntropyLoss .. math:: \ell(x, y) = \begin{cases} - \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, & + \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, & \text{if reduction} = \text{'mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{'sum'.} diff --git a/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst b/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst index 391ddde3d45..71c6989fab1 100644 --- a/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst +++ b/docs/api/api_python/nn/mindspore.nn.NLLLoss.rst @@ -11,7 +11,7 @@ mindspore.nn.NLLLoss \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top}, \quad l_{n}=-w_{t_{n}} x_{n, t_{n}}, \quad w_{c}=\text { weight }[c] \cdot \mathbb{1} - \{c \not= \text{ignore\_index}\}, + \{c \not= \text{ignore_index}\}, 其中, :math:`x` 表示预测值, :math:`t` 表示目标值, :math:`w` 表示权重,N表示batch size, :math:`c` 限定范围为[0, C-1],表示类索引,其中 :math:`C` 表示类的数量。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst b/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst index 89706667287..a03317a17dd 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_cross_entropy.rst @@ -13,7 +13,7 @@ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})} - \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\} + \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\} 其中, :math:`x` 表示预测值, :math:`t` 表示目标值, :math:`w` 表示权重,N表示batch size, :math:`c` 限定范围为[0, C-1],表示类索引,其中 :math:`C` 表示类的数量。 @@ -22,7 +22,7 @@ .. math:: \ell(x, y) = \begin{cases} - \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, & + \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, & \text{if reduction} = \text{'mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{'sum'.} diff --git a/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst b/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst index b14f5068b24..b6724dfe8b5 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_nll_loss.rst @@ -11,7 +11,7 @@ \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top}, \quad l_{n}=-w_{t_{n}} x_{n, t_{n}}, \quad w_{c}=\text { weight }[c] \cdot \mathbb{1} - \{c \not= \text{ignore\_index}\}, + \{c \not= \text{ignore_index}\}, 其中, :math:`x` 表示预测值, :math:`t` 表示目标值, :math:`w` 表示权重,N表示batch size, :math:`c` 限定范围为[0, C-1],表示类索引,其中 :math:`C` 表示类的数量。 diff --git a/mindspore/python/mindspore/nn/loss/loss.py b/mindspore/python/mindspore/nn/loss/loss.py index b4c3f4d1d43..219fa0af917 100644 --- a/mindspore/python/mindspore/nn/loss/loss.py +++ b/mindspore/python/mindspore/nn/loss/loss.py @@ -1602,7 +1602,7 @@ class NLLLoss(LossBase): \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top}, \quad l_{n}=-w_{t_{n}} x_{n, t_{n}}, - \quad w_{c}=\text { weight }[c] \cdot 1 + \quad w_{c}=\text { weight }[c] \cdot \mathbb{1}\{c \not= \text{ignore_index}\} where :math:`x` is the logits, :math:`t` is the labels, :math:`w` is the weight, N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes. @@ -1683,7 +1683,7 @@ class CrossEntropyLoss(LossBase): \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})} - \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\} + \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\} where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight, N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes. @@ -1693,7 +1693,7 @@ class CrossEntropyLoss(LossBase): .. math:: \ell(x, y) = \begin{cases} - \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, & + \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, & \text{if reduction} = \text{`mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{`sum'.} diff --git a/mindspore/python/mindspore/ops/function/nn_func.py b/mindspore/python/mindspore/ops/function/nn_func.py index aea075f1123..cc3f5de037b 100644 --- a/mindspore/python/mindspore/ops/function/nn_func.py +++ b/mindspore/python/mindspore/ops/function/nn_func.py @@ -261,7 +261,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})} - \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\} + \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\} where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight, N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes. @@ -271,7 +271,7 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mea .. math:: \ell(x, y) = \begin{cases} - \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, & + \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, & \text{if reduction} = \text{`mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{`sum'.} @@ -363,7 +363,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=None, reduction='mean', l \ell(x, t)=L=\left\{l_{1}, \ldots, l_{N}\right\}^{\top}, \quad l_{n}=-w_{t_{n}} x_{n, t_{n}}, \quad w_{c}=\text { weight }[c] \cdot \mathbb{1} - \{c \not= \text{ignore\_index}\}, + \{c \not= \text{ignore_index}\}, where :math:`x` is the inputs, :math:`t` is the target, :math:`w` is the weight, N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes. @@ -441,19 +441,16 @@ def _nll_loss(inputs, target, target_dim=-1, weight=None, ignore_index=None, red loss = loss.masked_fill(non_pad_mask, 0.) loss_weights = loss_weights.masked_fill(non_pad_mask, 0.) smooth_loss = smooth_loss.masked_fill(non_pad_mask, 0.) - else: - loss = loss.squeeze(target_dim) - smooth_loss = smooth_loss.squeeze(target_dim) + + loss = loss.squeeze(target_dim) + smooth_loss = smooth_loss.squeeze(target_dim) if reduction == 'sum': loss = loss.sum() smooth_loss = smooth_loss.sum() - elif reduction == 'mean': + if reduction == 'mean': loss = loss.sum() / loss_weights.sum() smooth_loss = smooth_loss.mean() - else: - loss = loss.sum(target_dim) - smooth_loss = smooth_loss.sum(target_dim) eps_i = label_smoothing / inputs.shape[target_dim] loss = (1. - label_smoothing) * loss + eps_i * smooth_loss