From 0ec70068ae9ad50f8b6d488e38c54af7af432a8e Mon Sep 17 00:00:00 2001
From: wanyiming <wanyiming@huawei.com>
Date: Sat, 29 Aug 2020 15:50:37 +0800
Subject: [PATCH] mod_SoftmaxCrossEntropyWithLogits

---
 mindspore/nn/loss/loss.py                     | 23 +++--------
 .../toolbox/uncertainty_evaluation.py         |  4 +-
 model_zoo/official/cv/alexnet/eval.py         |  2 +-
 model_zoo/official/cv/alexnet/train.py        |  2 +-
 model_zoo/official/cv/googlenet/eval.py       |  2 +-
 model_zoo/official/cv/googlenet/train.py      |  2 +-
 model_zoo/official/cv/lenet/eval.py           |  2 +-
 model_zoo/official/cv/lenet/train.py          |  2 +-
 .../official/cv/lenet_quant/eval_quant.py     |  2 +-
 .../official/cv/lenet_quant/train_quant.py    |  2 +-
 model_zoo/official/cv/mobilenetv2/eval.py     |  3 +-
 model_zoo/official/cv/mobilenetv2/train.py    |  5 +--
 .../official/cv/mobilenetv2_quant/eval.py     |  2 +-
 .../official/cv/mobilenetv2_quant/train.py    |  4 +-
 model_zoo/official/cv/mobilenetv3/eval.py     |  3 +-
 model_zoo/official/cv/mobilenetv3/train.py    |  3 +-
 model_zoo/official/cv/resnet/eval.py          |  5 ++-
 .../cv/resnet/src/CrossEntropySmooth.py       | 38 +++++++++++++++++++
 model_zoo/official/cv/resnet/train.py         | 12 +++---
 model_zoo/official/cv/vgg16/eval.py           |  2 +-
 model_zoo/official/cv/vgg16/train.py          |  2 +-
 model_zoo/official/nlp/lstm/eval.py           |  2 +-
 model_zoo/official/nlp/lstm/train.py          |  2 +-
 tests/st/fusion/test_conv_bn1_fusion.py       |  2 +-
 .../st/host_device/test_host_device_lenet.py  |  2 +-
 tests/st/nccl/test_nccl_lenet.py              |  2 +-
 .../models/resnet50/src/CrossEntropySmooth.py | 38 +++++++++++++++++++
 .../models/resnet50/test_resnet50_imagenet.py | 11 +++---
 tests/st/networks/test_cpu_lenet.py           |  2 +-
 tests/st/networks/test_gpu_alexnet.py         |  2 +-
 tests/st/networks/test_gpu_lenet.py           |  4 +-
 tests/st/networks/test_gpu_lstm.py            |  2 +-
 tests/st/networks/test_gpu_resnet.py          |  6 +--
 tests/st/networks/test_network_main.py        |  2 +-
 tests/st/ops/cpu/test_momentum_op.py          |  2 +-
 tests/st/ops/gpu/test_adam_op.py              |  2 +-
 tests/st/ops/gpu/test_ftrl_op.py              |  2 +-
 tests/st/ops/gpu/test_momentum_op.py          |  2 +-
 tests/st/ops/gpu/test_sgd_op.py               |  2 +-
 ...se_softmax_cross_entropy_with_logits_op.py | 23 +++--------
 tests/st/probability/test_bnn_layer.py        |  2 +-
 .../probability/test_transform_bnn_layer.py   |  2 +-
 .../probability/test_transform_bnn_model.py   |  2 +-
 .../test_cmp_sparse_embedding.py              |  4 +-
 tests/st/ps/full_ps/test_full_ps_lenet.py     |  2 +-
 .../st/ps/multi_full_ps/test_multi_full_ps.py |  4 +-
 tests/st/pynative/test_pynative_hook.py       |  2 +-
 tests/st/pynative/test_pynative_mindarmour.py |  4 +-
 .../lenet_quant/test_lenet_quant.py           |  6 +--
 tests/st/summary/test_summary.py              |  2 +-
 tests/ut/python/exec/test_train.py            |  2 +-
 tests/ut/python/exec/test_train_with_lars.py  |  2 +-
 .../python/parallel/test_allreduce_fusion.py  |  2 +-
 tests/ut/python/parallel/test_alltoall.py     |  2 +-
 .../parallel/test_batchnorm_batch_parallel.py |  2 +-
 .../ut/python/parallel/test_bn_prelu_cell.py  |  2 +-
 .../python/parallel/test_dataset_interface.py |  2 +-
 tests/ut/python/parallel/test_full_batch.py   |  2 +-
 tests/ut/python/parallel/test_one_dev.py      |  2 +-
 .../parallel/test_operator_model_parallel.py  |  4 +-
 tests/ut/python/parallel/test_prelu_cell.py   |  2 +-
 tests/ut/python/parallel/test_reshape.py      |  2 +-
 tests/ut/python/parallel/test_transpose.py    |  2 +-
 tests/ut/python/pynative_mode/test_hook.py    |  2 +-
 .../pynative_mode/test_pynative_model.py      |  2 +-
 tests/ut/python/utils/test_serialize.py       |  2 +-
 66 files changed, 170 insertions(+), 126 deletions(-)
 create mode 100644 model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
 create mode 100644 tests/st/networks/models/resnet50/src/CrossEntropySmooth.py

diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index 5e0a61e4de8..645e371a977 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
         of entry is a valid one.
 
     Args:
-        is_grad (bool): Specifies whether calculate grad only. Default: True.
         sparse (bool): Specifies whether labels use sparse format or not. Default: False.
         reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
             If "none", do not perform reduction. Default: "none".
-        smooth_factor (float): Label smoothing factor. It is a optional input which should be in range [0, 1].
-            Default: 0.
-        num_classes (int): The number of classes in the task. It is a optional input Default: 2.
 
     Inputs:
         - **logits** (Tensor) - Tensor of shape (N, C).
@@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
         >>> loss(logits, labels)
     """
     def __init__(self,
-                 is_grad=True,
                  sparse=False,
-                 reduction='none',
-                 smooth_factor=0,
-                 num_classes=2):
+                 reduction='none'):
         super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction)
-        self.is_grad = is_grad
         self.sparse = sparse
-        validator.check_number_range(
-            "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name)
-        self.smooth_factor = smooth_factor
-        self.num_classes = num_classes
+        self.reduction = reduction
         self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits()
         self.one_hot = P.OneHot()
-        self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32)
+        self.on_value = Tensor(1.0, mstype.float32)
+        self.off_value = Tensor(0., mstype.float32)
         self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"]
 
         if self.is_cpugpu:
-            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits(is_grad=self.is_grad)
+            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits()
 
     def construct(self, logits, labels):
-        if self.is_cpugpu and self.sparse:
+        if self.is_cpugpu and self.sparse and self.reduction == 'mean':
             x = self.sparse_softmax_cross_entropy(logits, labels)
             return x
 
diff --git a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
index d808ed304ba..35c87d4f1b8 100644
--- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
+++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
@@ -115,7 +115,7 @@ class UncertaintyEvaluation:
             self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model)
             if self.epi_uncer_model.drop_count == 0:
                 if self.task_type == 'classification':
-                    net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+                    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
                     net_opt = Adam(self.epi_uncer_model.trainable_params())
                     model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
                 else:
@@ -314,7 +314,7 @@ class AleatoricLoss(Cell):
             self.exp = P.Exp()
             self.normal = C.normal
             self.to_tensor = P.ScalarToArray()
-            self.entropy = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+            self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
         else:
             self.mean = P.ReduceMean()
             self.exp = P.Exp()
diff --git a/model_zoo/official/cv/alexnet/eval.py b/model_zoo/official/cv/alexnet/eval.py
index 6a091aedd89..7eb7905d38b 100644
--- a/model_zoo/official/cv/alexnet/eval.py
+++ b/model_zoo/official/cv/alexnet/eval.py
@@ -42,7 +42,7 @@ if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
     network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     repeat_size = cfg.epoch_size
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/alexnet/train.py b/model_zoo/official/cv/alexnet/train.py
index 4512244b922..83b2f9d3a19 100644
--- a/model_zoo/official/cv/alexnet/train.py
+++ b/model_zoo/official/cv/alexnet/train.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
 
     ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1)
     network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
     opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
     model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/googlenet/eval.py b/model_zoo/official/cv/googlenet/eval.py
index 31646c97135..4118a7294fd 100644
--- a/model_zoo/official/cv/googlenet/eval.py
+++ b/model_zoo/official/cv/googlenet/eval.py
@@ -41,7 +41,7 @@ if __name__ == '__main__':
     net = GoogleNet(num_classes=cfg.num_classes)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
                    weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
 
     if device_target == "Ascend":
diff --git a/model_zoo/official/cv/googlenet/train.py b/model_zoo/official/cv/googlenet/train.py
index 5181f9c484a..78d4ec28f43 100644
--- a/model_zoo/official/cv/googlenet/train.py
+++ b/model_zoo/official/cv/googlenet/train.py
@@ -101,7 +101,7 @@ if __name__ == '__main__':
     lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum,
                    weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     if device_target == "Ascend":
         model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
diff --git a/model_zoo/official/cv/lenet/eval.py b/model_zoo/official/cv/lenet/eval.py
index bcd5503c399..69525d853f8 100644
--- a/model_zoo/official/cv/lenet/eval.py
+++ b/model_zoo/official/cv/lenet/eval.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     repeat_size = cfg.epoch_size
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
diff --git a/model_zoo/official/cv/lenet/train.py b/model_zoo/official/cv/lenet/train.py
index 2c45c5b3274..4dd09b16521 100644
--- a/model_zoo/official/cv/lenet/train.py
+++ b/model_zoo/official/cv/lenet/train.py
@@ -50,7 +50,7 @@ if __name__ == "__main__":
                               cfg.batch_size)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
     config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
diff --git a/model_zoo/official/cv/lenet_quant/eval_quant.py b/model_zoo/official/cv/lenet_quant/eval_quant.py
index f545a8a23a8..5b23b98ad23 100644
--- a/model_zoo/official/cv/lenet_quant/eval_quant.py
+++ b/model_zoo/official/cv/lenet_quant/eval_quant.py
@@ -53,7 +53,7 @@ if __name__ == "__main__":
                                           per_channel=[True, False])
 
     # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/model_zoo/official/cv/lenet_quant/train_quant.py b/model_zoo/official/cv/lenet_quant/train_quant.py
index 51d37cc1bfa..9d5462c37e7 100644
--- a/model_zoo/official/cv/lenet_quant/train_quant.py
+++ b/model_zoo/official/cv/lenet_quant/train_quant.py
@@ -62,7 +62,7 @@ if __name__ == "__main__":
                                           symmetric=[False, False])
 
     # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/model_zoo/official/cv/mobilenetv2/eval.py b/model_zoo/official/cv/mobilenetv2/eval.py
index 897e7ffe274..e4ac99013ca 100644
--- a/model_zoo/official/cv/mobilenetv2/eval.py
+++ b/model_zoo/official/cv/mobilenetv2/eval.py
@@ -51,8 +51,7 @@ if __name__ == '__main__':
     else:
         raise ValueError("Unsupported device_target.")
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     if args_opt.device_target == "Ascend":
         net.to_float(mstype.float16)
diff --git a/model_zoo/official/cv/mobilenetv2/train.py b/model_zoo/official/cv/mobilenetv2/train.py
index 4fb800d6ddd..75255d90e06 100644
--- a/model_zoo/official/cv/mobilenetv2/train.py
+++ b/model_zoo/official/cv/mobilenetv2/train.py
@@ -172,7 +172,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth,
                                                num_classes=config_gpu.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         # define dataset
         epoch_size = config_gpu.epoch_size
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
@@ -236,8 +236,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(
                 smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                  do_train=True,
                                  config=config_ascend,
diff --git a/model_zoo/official/cv/mobilenetv2_quant/eval.py b/model_zoo/official/cv/mobilenetv2_quant/eval.py
index e6b0875c75f..d00fada259f 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/eval.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/eval.py
@@ -55,7 +55,7 @@ if __name__ == '__main__':
         # convert fusion network to quantization aware network
         network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
     # define network loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
     # define dataset
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv2_quant/train.py b/model_zoo/official/cv/mobilenetv2_quant/train.py
index ebe60996cf9..5413619c850 100644
--- a/model_zoo/official/cv/mobilenetv2_quant/train.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/train.py
@@ -89,7 +89,7 @@ def train_on_ascend():
     if config.label_smooth > 0:
         loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
     else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # define dataset
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
                              do_train=True,
@@ -150,7 +150,7 @@ def train_on_gpu():
         loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
                                            num_classes=config.num_classes)
     else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # define dataset
     epoch_size = config.epoch_size
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv3/eval.py b/model_zoo/official/cv/mobilenetv3/eval.py
index 43ebb1f21a7..d7e076490f2 100644
--- a/model_zoo/official/cv/mobilenetv3/eval.py
+++ b/model_zoo/official/cv/mobilenetv3/eval.py
@@ -41,8 +41,7 @@ if __name__ == '__main__':
     else:
         raise ValueError("Unsupported device_target.")
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net = mobilenet_v3_large(num_classes=config.num_classes)
 
     dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/mobilenetv3/train.py b/model_zoo/official/cv/mobilenetv3/train.py
index 60f3723244e..fc49582a574 100644
--- a/model_zoo/official/cv/mobilenetv3/train.py
+++ b/model_zoo/official/cv/mobilenetv3/train.py
@@ -162,8 +162,7 @@ if __name__ == '__main__':
             loss = CrossEntropyWithLabelSmooth(
                 smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         # define dataset
         epoch_size = config_gpu.epoch_size
         dataset = create_dataset(dataset_path=args_opt.dataset_path,
diff --git a/model_zoo/official/cv/resnet/eval.py b/model_zoo/official/cv/resnet/eval.py
index f7f0b593aea..570a26ee5cb 100755
--- a/model_zoo/official/cv/resnet/eval.py
+++ b/model_zoo/official/cv/resnet/eval.py
@@ -22,6 +22,7 @@ from mindspore import dataset as de
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from src.CrossEntropySmooth import CrossEntropySmooth
 
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@@ -79,8 +80,8 @@ if __name__ == '__main__':
     if args_opt.dataset == "imagenet2012":
         if not config.use_label_smooth:
             config.label_smooth_factor = 0.0
-        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                             smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+        loss = CrossEntropySmooth(sparse=True, reduction='mean',
+                                  smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     else:
         loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
diff --git a/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
new file mode 100644
index 00000000000..bf38c6e77b0
--- /dev/null
+++ b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""define loss function for network"""
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+
+
+class CrossEntropySmooth(_Loss):
+    """CrossEntropy"""
+    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
+        super(CrossEntropySmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.sparse = sparse
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
+
+    def construct(self, logit, label):
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, label)
+        return loss
diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py
index 0a891b91638..e7f231152cb 100755
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@@ -31,6 +31,7 @@ from mindspore.communication.management import init, get_rank, get_group_size
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from src.lr_generator import get_lr, warmup_cosine_annealing_lr
+from src.CrossEntropySmooth import CrossEntropySmooth
 
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@@ -145,8 +146,8 @@ if __name__ == '__main__':
         if args_opt.dataset == "imagenet2012":
             if not config.use_label_smooth:
                 config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
         else:
             loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
@@ -157,11 +158,10 @@ if __name__ == '__main__':
         if args_opt.dataset == "imagenet2012":
             if not config.use_label_smooth:
                 config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
         else:
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 num_classes=config.class_num)
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
 
         if args_opt.net == "resnet101" or args_opt.net == "resnet50":
             opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay,
diff --git a/model_zoo/official/cv/vgg16/eval.py b/model_zoo/official/cv/vgg16/eval.py
index e0e9fd1fd0b..be9e6cbe123 100644
--- a/model_zoo/official/cv/vgg16/eval.py
+++ b/model_zoo/official/cv/vgg16/eval.py
@@ -134,7 +134,7 @@ def test(cloud_args=None):
         net = vgg16(num_classes=args.num_classes, args=args)
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum,
                        weight_decay=args.weight_decay)
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
 
         param_dict = load_checkpoint(args.pre_trained)
diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py
index ae2f934e1e7..aeae4584330 100644
--- a/model_zoo/official/cv/vgg16/train.py
+++ b/model_zoo/official/cv/vgg16/train.py
@@ -210,7 +210,7 @@ if __name__ == '__main__':
                    loss_scale=args.loss_scale)
 
     if args.dataset == "cifar10":
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
         model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
                       amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
     else:
diff --git a/model_zoo/official/nlp/lstm/eval.py b/model_zoo/official/nlp/lstm/eval.py
index 6d731fbd0df..8bb139c65ca 100644
--- a/model_zoo/official/nlp/lstm/eval.py
+++ b/model_zoo/official/nlp/lstm/eval.py
@@ -64,7 +64,7 @@ if __name__ == '__main__':
                            weight=Tensor(embedding_table),
                            batch_size=cfg.batch_size)
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     loss_cb = LossMonitor()
 
diff --git a/model_zoo/official/nlp/lstm/train.py b/model_zoo/official/nlp/lstm/train.py
index 53c3a89a6a3..7fa625db04d 100644
--- a/model_zoo/official/nlp/lstm/train.py
+++ b/model_zoo/official/nlp/lstm/train.py
@@ -70,7 +70,7 @@ if __name__ == '__main__':
     if args.pre_trained:
         load_param_into_net(network, load_checkpoint(args.pre_trained))
 
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
     loss_cb = LossMonitor()
 
diff --git a/tests/st/fusion/test_conv_bn1_fusion.py b/tests/st/fusion/test_conv_bn1_fusion.py
index 905179ee30c..51d1fac71b4 100644
--- a/tests/st/fusion/test_conv_bn1_fusion.py
+++ b/tests/st/fusion/test_conv_bn1_fusion.py
@@ -39,7 +39,7 @@ class MsWrapper(nn.Cell):
 
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                       filter(lambda x: x.requires_grad, net.get_parameters()))
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/st/host_device/test_host_device_lenet.py b/tests/st/host_device/test_host_device_lenet.py
index 0a312a34221..80bf7b578a4 100644
--- a/tests/st/host_device/test_host_device_lenet.py
+++ b/tests/st/host_device/test_host_device_lenet.py
@@ -66,7 +66,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py
index 37fd6363c06..3d7dada980d 100644
--- a/tests/st/nccl/test_nccl_lenet.py
+++ b/tests/st/nccl/test_nccl_lenet.py
@@ -85,7 +85,7 @@ def test_lenet_nccl():
     learning_rate = multisteplr(epoch, 2)
     momentum = 0.9
     mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, mom_optimizer)
     train_network.set_train()
diff --git a/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py
new file mode 100644
index 00000000000..bf38c6e77b0
--- /dev/null
+++ b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""define loss function for network"""
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+
+
+class CrossEntropySmooth(_Loss):
+    """CrossEntropy"""
+    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
+        super(CrossEntropySmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.sparse = sparse
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
+
+    def construct(self, logit, label):
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, label)
+        return loss
diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
index 220b9862085..26236a289a5 100644
--- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
+++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py
@@ -35,12 +35,12 @@ from tests.st.networks.models.resnet50.src.dataset import create_dataset
 from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate
 from tests.st.networks.models.resnet50.src.config import config
 from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell
+from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth
 from tests.st.networks.models.resnet50.src_thor.config import config as thor_config
 from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model
 from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor
 from tests.st.networks.models.resnet50.src_thor.thor import THOR
 
-
 MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_1.json"
 MINDSPORE_HCCL_CONFIG_PATH_2 = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_2.json"
 dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train"
@@ -150,8 +150,8 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl):
         config.label_smooth_factor = 0.0
 
     # loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
-                                            num_classes=config.class_num)
+    loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
+                              num_classes=config.class_num)
 
     # train dataset
     dataset = create_dataset(dataset_path=dataset_path, do_train=True,
@@ -259,9 +259,8 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
         thor_config.label_smooth_factor = 0.0
 
     # loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                            smooth_factor=thor_config.label_smooth_factor,
-                                            num_classes=thor_config.class_num)
+    loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=thor_config.label_smooth_factor,
+                              num_classes=thor_config.class_num)
 
     # train dataset
     dataset = create_dataset(dataset_path=dataset_path, do_train=True,
diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py
index 9a11b23c87a..6d25e6a4713 100644
--- a/tests/st/networks/test_cpu_lenet.py
+++ b/tests/st/networks/test_cpu_lenet.py
@@ -60,7 +60,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_alexnet.py b/tests/st/networks/test_gpu_alexnet.py
index 7a55006571e..4ade9de314c 100644
--- a/tests/st/networks/test_gpu_alexnet.py
+++ b/tests/st/networks/test_gpu_alexnet.py
@@ -78,7 +78,7 @@ def test_trainTensor(num_classes=10, epoch=15, batch_size=32):
     lr = 0.1
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_lenet.py b/tests/st/networks/test_gpu_lenet.py
index 4677c7ad008..ad77a691707 100644
--- a/tests/st/networks/test_gpu_lenet.py
+++ b/tests/st/networks/test_gpu_lenet.py
@@ -136,7 +136,7 @@ def test_train_lenet():
     learning_rate = multisteplr(epoch, 30)
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
@@ -192,7 +192,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
 def test_train_and_eval_lenet():
     context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
     network = LeNet5(10)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
 
diff --git a/tests/st/networks/test_gpu_lstm.py b/tests/st/networks/test_gpu_lstm.py
index bc59b7e3872..5604d9dd3dc 100644
--- a/tests/st/networks/test_gpu_lstm.py
+++ b/tests/st/networks/test_gpu_lstm.py
@@ -130,7 +130,7 @@ def test_LSTM():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py
index d440c5cacba..8444bd55c40 100644
--- a/tests/st/networks/test_gpu_resnet.py
+++ b/tests/st/networks/test_gpu_resnet.py
@@ -337,7 +337,7 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)  # optimizer
@@ -361,7 +361,7 @@ def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)  # optimizer
@@ -385,7 +385,7 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
     momentum = 0.9
     optimizer = Momentum(filter(lambda x: x.requires_grad,
                                 net.get_parameters()), lr, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     train_network = amp.build_train_network(
         net, optimizer, criterion, level="O2")
     train_network.set_train()
diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py
index a05798bfbec..1a8fed1fc11 100644
--- a/tests/st/networks/test_network_main.py
+++ b/tests/st/networks/test_network_main.py
@@ -39,7 +39,7 @@ def train(net, data, label):
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/cpu/test_momentum_op.py b/tests/st/ops/cpu/test_momentum_op.py
index 717925c23e6..b35ec5da4ed 100644
--- a/tests/st/ops/cpu/test_momentum_op.py
+++ b/tests/st/ops/cpu/test_momentum_op.py
@@ -52,7 +52,7 @@ def test_momentum():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_adam_op.py b/tests/st/ops/gpu/test_adam_op.py
index 6e2bb0ddab3..8c2e16e6386 100644
--- a/tests/st/ops/gpu/test_adam_op.py
+++ b/tests/st/ops/gpu/test_adam_op.py
@@ -49,7 +49,7 @@ def test_adam():
     net = NetAdam()
     optimizer = Adam(filter(lambda x: x.requires_grad,
                             net.get_parameters()), learning_rate=0.01)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)
diff --git a/tests/st/ops/gpu/test_ftrl_op.py b/tests/st/ops/gpu/test_ftrl_op.py
index 55d5972c20f..e9518f7762f 100644
--- a/tests/st/ops/gpu/test_ftrl_op.py
+++ b/tests/st/ops/gpu/test_ftrl_op.py
@@ -49,7 +49,7 @@ def test_ftrl():
     net = NetFtrl()
     optimizer = FTRL(filter(lambda x: x.requires_grad,
                             net.get_parameters()), learning_rate=0.01)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(
         net_with_criterion, optimizer)
diff --git a/tests/st/ops/gpu/test_momentum_op.py b/tests/st/ops/gpu/test_momentum_op.py
index 48b1ed3380f..51ec0ffc7aa 100644
--- a/tests/st/ops/gpu/test_momentum_op.py
+++ b/tests/st/ops/gpu/test_momentum_op.py
@@ -52,7 +52,7 @@ def test_momentum():
     momentum = 0.9
 
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_sgd_op.py b/tests/st/ops/gpu/test_sgd_op.py
index 85d470f50da..f959d879cb2 100644
--- a/tests/st/ops/gpu/test_sgd_op.py
+++ b/tests/st/ops/gpu/test_sgd_op.py
@@ -55,7 +55,7 @@ def test_SGD():
 
     optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening,
                     weight_decay, nesterov, loss_scale)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
     train_network.set_train()
diff --git a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
index d18eeeb0ad5..c677d8c79f3 100644
--- a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
+++ b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py
@@ -20,15 +20,13 @@ import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 
-
 class NetSparseSoftmaxCrossEntropyWithLogits(nn.Cell):
     def __init__(self):
         super(NetSparseSoftmaxCrossEntropyWithLogits, self).__init__()
-        self.loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
-        self.dlogits = nn.SoftmaxCrossEntropyWithLogits(is_grad=True, sparse=True)
+        self.loss = self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
 
     def construct(self, logits, labels):
-        return (self.loss(logits, labels), self.dlogits(logits, labels))
+        return self.loss(logits, labels)
 
 
 @pytest.mark.level0
@@ -39,29 +37,18 @@ def test_sparse_softmax_cross_entropy_with_logits():
                               [1, 10, 1],
                               [10, 1, 1]]).astype(np.float32))
     labels = Tensor(np.array([2, 1, 0]).astype(np.int32))
-    expect_loss = 0.0002467
-    expect_dlogits = np.array([[4.1126452e-05, 4.1126452e-05, -8.2234539e-05],
-                               [4.1126452e-05, -8.2234539e-05, 4.1126452e-05],
-                               [-8.2234539e-05, 4.1126452e-05, 4.1126452e-05]]).astype(np.float32)
+    expect_loss = [0.00024673, 0.00024673, 0.00024673]
 
     context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
     sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
     output = sparse_softmax_cross_entropy_with_logits(logits, labels)
     error0 = 1.0e-6
-    diff0 = output[0].asnumpy() - expect_loss
+    diff0 = output.asnumpy() - expect_loss
     assert np.all(abs(diff0) < error0)
 
-    error1 = np.ones(shape=[3, 3]) * 1.0e-6
-    diff1 = output[1].asnumpy() - expect_dlogits
-    assert np.all(abs(diff1) < error1)
-
     context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
     sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
     output = sparse_softmax_cross_entropy_with_logits(logits, labels)
     error0 = 1.0e-6
-    diff0 = output[0].asnumpy() - expect_loss
+    diff0 = output.asnumpy() - expect_loss
     assert np.all(abs(diff0) < error0)
-
-    error1 = np.ones(shape=[3, 3]) * 1.0e-6
-    diff1 = output[1].asnumpy() - expect_dlogits
-    assert np.all(abs(diff1) < error1)
diff --git a/tests/st/probability/test_bnn_layer.py b/tests/st/probability/test_bnn_layer.py
index 742b17c2688..cdc16908c30 100644
--- a/tests/st/probability/test_bnn_layer.py
+++ b/tests/st/probability/test_bnn_layer.py
@@ -124,7 +124,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = BNNLeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, 60000, 0.000001)
diff --git a/tests/st/probability/test_transform_bnn_layer.py b/tests/st/probability/test_transform_bnn_layer.py
index 3fd4bfd4001..52f0edffa78 100644
--- a/tests/st/probability/test_transform_bnn_layer.py
+++ b/tests/st/probability/test_transform_bnn_layer.py
@@ -125,7 +125,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = LeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = WithLossCell(network, criterion)
diff --git a/tests/st/probability/test_transform_bnn_model.py b/tests/st/probability/test_transform_bnn_model.py
index 5cc7733e891..008802b3d5e 100644
--- a/tests/st/probability/test_transform_bnn_model.py
+++ b/tests/st/probability/test_transform_bnn_model.py
@@ -124,7 +124,7 @@ def validate_model(net, dataset):
 if __name__ == "__main__":
     network = LeNet5()
 
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
 
     net_with_loss = WithLossCell(network, criterion)
diff --git a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
index a596e13c0f3..aecf8d781d5 100644
--- a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
+++ b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
@@ -73,9 +73,7 @@ def do_sparse_embedding(ps=False):
 
     optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters()))
     optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU")
-    criterion = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction="mean"
-    )
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_with_criterion = WithLossCell(net, criterion)
     train_network = TrainOneStepCell(net_with_criterion, optimizer)
     train_network.set_train()
diff --git a/tests/st/ps/full_ps/test_full_ps_lenet.py b/tests/st/ps/full_ps/test_full_ps_lenet.py
index fbf48e5fb86..aca875f6fcc 100644
--- a/tests/st/ps/full_ps/test_full_ps_lenet.py
+++ b/tests/st/ps/full_ps/test_full_ps_lenet.py
@@ -123,7 +123,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
 if __name__ == "__main__":
     network = LeNet5(10)
     network.set_param_ps()
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
 
diff --git a/tests/st/ps/multi_full_ps/test_multi_full_ps.py b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
index 30bf6176923..f53063a9a63 100644
--- a/tests/st/ps/multi_full_ps/test_multi_full_ps.py
+++ b/tests/st/ps/multi_full_ps/test_multi_full_ps.py
@@ -94,9 +94,7 @@ if __name__ == "__main__":
     np.random.seed(0)
     network = LeNet5(10)
     network.set_param_ps()
-    criterion = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction="mean"
-    )
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
     if device_target == "GPU":
         context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
diff --git a/tests/st/pynative/test_pynative_hook.py b/tests/st/pynative/test_pynative_hook.py
index 99688697aef..e5cc6240144 100644
--- a/tests/st/pynative/test_pynative_hook.py
+++ b/tests/st/pynative/test_pynative_hook.py
@@ -159,7 +159,7 @@ def test_pynative_lenet_train_hook_function_print_and_save_grad():
                                               cell_hook_function_print_grad)
     net = LeNet5(hook_function=function[0], cell_hook_function=function[1])
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = GradWrap(net_with_criterion)
     train_network.set_train()
diff --git a/tests/st/pynative/test_pynative_mindarmour.py b/tests/st/pynative/test_pynative_mindarmour.py
index 23e7b2d042d..dc52506dc8c 100644
--- a/tests/st/pynative/test_pynative_mindarmour.py
+++ b/tests/st/pynative/test_pynative_mindarmour.py
@@ -145,14 +145,14 @@ def test_multi_grads():
     net = LeNet()
 
     # grad operation
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
     with_loss_cell = WithLossCell(net, loss_fn)
     grad_all = GradWrapWithLoss(with_loss_cell)
     grad_out = grad_all(Tensor(inputs_np), Tensor(labels_np)).asnumpy()
     assert np.any(grad_out != 0), 'grad result can not be all zeros'
 
     # train-one-step operation
-    loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                          0.01, 0.9)
     loss_net = WithLossCell(net, loss_fn)
diff --git a/tests/st/quantization/lenet_quant/test_lenet_quant.py b/tests/st/quantization/lenet_quant/test_lenet_quant.py
index 361aa1abf62..1d1e8fb94a5 100644
--- a/tests/st/quantization/lenet_quant/test_lenet_quant.py
+++ b/tests/st/quantization/lenet_quant/test_lenet_quant.py
@@ -42,7 +42,7 @@ def train_lenet():
                               cfg.batch_size)
 
     network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
     time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
     config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
@@ -74,7 +74,7 @@ def train_lenet_quant():
                                           symmetric=[False, False])
 
     # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
@@ -104,7 +104,7 @@ def eval_quant():
                                           per_channel=[True, False])
 
     # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     # define network optimization
     net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
 
diff --git a/tests/st/summary/test_summary.py b/tests/st/summary/test_summary.py
index b81d15514af..7aa5d95358d 100644
--- a/tests/st/summary/test_summary.py
+++ b/tests/st/summary/test_summary.py
@@ -154,7 +154,7 @@ class TestSummary:
 
     def _run_network(self, dataset_sink_mode=True):
         lenet = LeNet5()
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
         optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9)
         model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()})
         summary_dir = tempfile.mkdtemp(dir=self.base_summary_dir)
diff --git a/tests/ut/python/exec/test_train.py b/tests/ut/python/exec/test_train.py
index 2cd9b9cad47..618ad3c0341 100644
--- a/tests/ut/python/exec/test_train.py
+++ b/tests/ut/python/exec/test_train.py
@@ -31,7 +31,7 @@ def lr_gen(fn, epoch_size):
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
     """me_train_tensor"""
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9,
                    0.01, 1024)
     Model(net, loss, opt)
diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py
index b09584f2989..04087cb0f0a 100644
--- a/tests/ut/python/exec/test_train_with_lars.py
+++ b/tests/ut/python/exec/test_train_with_lars.py
@@ -78,7 +78,7 @@ def lr_gen(fn, epoch_size):
 
 def me_train_tensor(net, input_np, label_np, epoch_size=2):
     """me_train_tensor"""
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     # reorder the net parameters , leave the parameters that need to be passed into lars to the end part
 
     opt = Momentum(get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024)
diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py
index c93df7ffb1b..3ce0b274b60 100644
--- a/tests/ut/python/parallel/test_allreduce_fusion.py
+++ b/tests/ut/python/parallel/test_allreduce_fusion.py
@@ -113,7 +113,7 @@ def train_common(net):
     label = Tensor(np.ones([batch_size]), dtype=ms.int32)
     dataset = Dataset(predict, label, 2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
 
diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py
index 96ff8435046..4f1794484d1 100644
--- a/tests/ut/python/parallel/test_alltoall.py
+++ b/tests/ut/python/parallel/test_alltoall.py
@@ -78,7 +78,7 @@ def all_to_all_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
index 21d5003b4c0..32e597d8873 100644
--- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
+++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py
@@ -133,7 +133,7 @@ def test_batchnorm_batch_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = batchnorm_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py
index 07f5d3906be..354add8511b 100644
--- a/tests/ut/python/parallel/test_bn_prelu_cell.py
+++ b/tests/ut/python/parallel/test_bn_prelu_cell.py
@@ -209,7 +209,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):
     dataset = Dataset(predict, label, 2)
     net = bn_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(strategy_loss)
     opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size)
 
diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py
index 0e70b2513c2..baab850b84a 100644
--- a/tests/ut/python/parallel/test_dataset_interface.py
+++ b/tests/ut/python/parallel/test_dataset_interface.py
@@ -79,7 +79,7 @@ def loss_scale_manager_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     scale_manager = DynamicLossScaleManager(32, 2, 2000)
diff --git a/tests/ut/python/parallel/test_full_batch.py b/tests/ut/python/parallel/test_full_batch.py
index 70a68a5b00c..68e77f04608 100644
--- a/tests/ut/python/parallel/test_full_batch.py
+++ b/tests/ut/python/parallel/test_full_batch.py
@@ -75,7 +75,7 @@ def all_to_all_common(strategy1):
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net(strategy1)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py
index 056f4a15c75..7f10e3dc6ae 100644
--- a/tests/ut/python/parallel/test_one_dev.py
+++ b/tests/ut/python/parallel/test_one_dev.py
@@ -81,7 +81,7 @@ def all_to_all_common():
     dataset = Dataset(predict, label, 2)
     net = all_to_all_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
 
diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py
index 788521c5258..8cbfa14a91d 100644
--- a/tests/ut/python/parallel/test_operator_model_parallel.py
+++ b/tests/ut/python/parallel/test_operator_model_parallel.py
@@ -361,7 +361,7 @@ def test_resnet_operator_batch_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = resnet_operator_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
@@ -386,7 +386,7 @@ def test_resnet_model_parallel():
     dataset = DatasetLenet(predict, label, 2)
     net = resnet_model_parallel_net(num_classes)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
 
diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py
index dca467ef8df..59d14359c19 100644
--- a/tests/ut/python/parallel/test_prelu_cell.py
+++ b/tests/ut/python/parallel/test_prelu_cell.py
@@ -107,7 +107,7 @@ def reshape_common(parallel_mode):
     dataset = Dataset(predict, label, 2)
     net = prelu_net()
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     model = Model(net, loss, opt)
     model.train(epoch_size, dataset, dataset_sink_mode=False)
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index 9cfb376e1b6..28a1a27f3f0 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -94,7 +94,7 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss
     dataset = Dataset(predict, label, 2)
     net = reshape_net(strategy0, strategy1, strategy2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(strategy_loss)
     loss.one_hot.set_strategy(((8, 1), (), ()))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py
index b0b917bf191..300b5dae2e2 100644
--- a/tests/ut/python/parallel/test_transpose.py
+++ b/tests/ut/python/parallel/test_transpose.py
@@ -79,7 +79,7 @@ def transpose_common(strategy1, strategy2):
     dataset = Dataset(predict, label, 2)
     net = transpose_net(strategy1, strategy2)
 
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
     opt = Momentum(net.trainable_params(), learning_rate, momentum)
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/ut/python/pynative_mode/test_hook.py b/tests/ut/python/pynative_mode/test_hook.py
index 6c2204f3810..a138e6b098c 100644
--- a/tests/ut/python/pynative_mode/test_hook.py
+++ b/tests/ut/python/pynative_mode/test_hook.py
@@ -141,7 +141,7 @@ class GradWrap(nn.Cell):
 def test_hook():
     net = LeNet5()
     optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
     net_with_criterion = WithLossCell(net, criterion)
     train_network = GradWrap(net_with_criterion)
     train_network.set_train()
diff --git a/tests/ut/python/pynative_mode/test_pynative_model.py b/tests/ut/python/pynative_mode/test_pynative_model.py
index a0469cdaf4b..ea40227e554 100644
--- a/tests/ut/python/pynative_mode/test_pynative_model.py
+++ b/tests/ut/python/pynative_mode/test_pynative_model.py
@@ -129,7 +129,7 @@ def test_lenet_grad():
     verification_step = 0
 
     net = LeNet5()
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits()
     momen_opti = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
     train_net = GradWrap(NetWithLossClass(net))
     train_net.set_train()
diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py
index dae05e98302..56c18508488 100644
--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@@ -282,7 +282,7 @@ def test_load_param_into_net():
 
 def test_exec_save_checkpoint():
     net = Net()
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
     opt = Momentum(net.trainable_params(), 0.0, 0.9, 0.0001, 1024)
 
     loss_net = WithLossCell(net, loss)