From 0ec70068ae9ad50f8b6d488e38c54af7af432a8e Mon Sep 17 00:00:00 2001 From: wanyiming Date: Sat, 29 Aug 2020 15:50:37 +0800 Subject: [PATCH] mod_SoftmaxCrossEntropyWithLogits --- mindspore/nn/loss/loss.py | 23 +++-------- .../toolbox/uncertainty_evaluation.py | 4 +- model_zoo/official/cv/alexnet/eval.py | 2 +- model_zoo/official/cv/alexnet/train.py | 2 +- model_zoo/official/cv/googlenet/eval.py | 2 +- model_zoo/official/cv/googlenet/train.py | 2 +- model_zoo/official/cv/lenet/eval.py | 2 +- model_zoo/official/cv/lenet/train.py | 2 +- .../official/cv/lenet_quant/eval_quant.py | 2 +- .../official/cv/lenet_quant/train_quant.py | 2 +- model_zoo/official/cv/mobilenetv2/eval.py | 3 +- model_zoo/official/cv/mobilenetv2/train.py | 5 +-- .../official/cv/mobilenetv2_quant/eval.py | 2 +- .../official/cv/mobilenetv2_quant/train.py | 4 +- model_zoo/official/cv/mobilenetv3/eval.py | 3 +- model_zoo/official/cv/mobilenetv3/train.py | 3 +- model_zoo/official/cv/resnet/eval.py | 5 ++- .../cv/resnet/src/CrossEntropySmooth.py | 38 +++++++++++++++++++ model_zoo/official/cv/resnet/train.py | 12 +++--- model_zoo/official/cv/vgg16/eval.py | 2 +- model_zoo/official/cv/vgg16/train.py | 2 +- model_zoo/official/nlp/lstm/eval.py | 2 +- model_zoo/official/nlp/lstm/train.py | 2 +- tests/st/fusion/test_conv_bn1_fusion.py | 2 +- .../st/host_device/test_host_device_lenet.py | 2 +- tests/st/nccl/test_nccl_lenet.py | 2 +- .../models/resnet50/src/CrossEntropySmooth.py | 38 +++++++++++++++++++ .../models/resnet50/test_resnet50_imagenet.py | 11 +++--- tests/st/networks/test_cpu_lenet.py | 2 +- tests/st/networks/test_gpu_alexnet.py | 2 +- tests/st/networks/test_gpu_lenet.py | 4 +- tests/st/networks/test_gpu_lstm.py | 2 +- tests/st/networks/test_gpu_resnet.py | 6 +-- tests/st/networks/test_network_main.py | 2 +- tests/st/ops/cpu/test_momentum_op.py | 2 +- tests/st/ops/gpu/test_adam_op.py | 2 +- tests/st/ops/gpu/test_ftrl_op.py | 2 +- tests/st/ops/gpu/test_momentum_op.py | 2 +- tests/st/ops/gpu/test_sgd_op.py | 2 +- ...se_softmax_cross_entropy_with_logits_op.py | 23 +++-------- tests/st/probability/test_bnn_layer.py | 2 +- .../probability/test_transform_bnn_layer.py | 2 +- .../probability/test_transform_bnn_model.py | 2 +- .../test_cmp_sparse_embedding.py | 4 +- tests/st/ps/full_ps/test_full_ps_lenet.py | 2 +- .../st/ps/multi_full_ps/test_multi_full_ps.py | 4 +- tests/st/pynative/test_pynative_hook.py | 2 +- tests/st/pynative/test_pynative_mindarmour.py | 4 +- .../lenet_quant/test_lenet_quant.py | 6 +-- tests/st/summary/test_summary.py | 2 +- tests/ut/python/exec/test_train.py | 2 +- tests/ut/python/exec/test_train_with_lars.py | 2 +- .../python/parallel/test_allreduce_fusion.py | 2 +- tests/ut/python/parallel/test_alltoall.py | 2 +- .../parallel/test_batchnorm_batch_parallel.py | 2 +- .../ut/python/parallel/test_bn_prelu_cell.py | 2 +- .../python/parallel/test_dataset_interface.py | 2 +- tests/ut/python/parallel/test_full_batch.py | 2 +- tests/ut/python/parallel/test_one_dev.py | 2 +- .../parallel/test_operator_model_parallel.py | 4 +- tests/ut/python/parallel/test_prelu_cell.py | 2 +- tests/ut/python/parallel/test_reshape.py | 2 +- tests/ut/python/parallel/test_transpose.py | 2 +- tests/ut/python/pynative_mode/test_hook.py | 2 +- .../pynative_mode/test_pynative_model.py | 2 +- tests/ut/python/utils/test_serialize.py | 2 +- 66 files changed, 170 insertions(+), 126 deletions(-) create mode 100644 model_zoo/official/cv/resnet/src/CrossEntropySmooth.py create mode 100644 tests/st/networks/models/resnet50/src/CrossEntropySmooth.py diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index 5e0a61e4de8..645e371a977 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss): of entry is a valid one. Args: - is_grad (bool): Specifies whether calculate grad only. Default: True. sparse (bool): Specifies whether labels use sparse format or not. Default: False. reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". If "none", do not perform reduction. Default: "none". - smooth_factor (float): Label smoothing factor. It is a optional input which should be in range [0, 1]. - Default: 0. - num_classes (int): The number of classes in the task. It is a optional input Default: 2. Inputs: - **logits** (Tensor) - Tensor of shape (N, C). @@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss): >>> loss(logits, labels) """ def __init__(self, - is_grad=True, sparse=False, - reduction='none', - smooth_factor=0, - num_classes=2): + reduction='none'): super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) - self.is_grad = is_grad self.sparse = sparse - validator.check_number_range( - "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) - self.smooth_factor = smooth_factor - self.num_classes = num_classes + self.reduction = reduction self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits() self.one_hot = P.OneHot() - self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32) - self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32) + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0., mstype.float32) self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"] if self.is_cpugpu: - self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits(is_grad=self.is_grad) + self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits() def construct(self, logits, labels): - if self.is_cpugpu and self.sparse: + if self.is_cpugpu and self.sparse and self.reduction == 'mean': x = self.sparse_softmax_cross_entropy(logits, labels) return x diff --git a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py index d808ed304ba..35c87d4f1b8 100644 --- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py +++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py @@ -115,7 +115,7 @@ class UncertaintyEvaluation: self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model) if self.epi_uncer_model.drop_count == 0: if self.task_type == 'classification': - net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = Adam(self.epi_uncer_model.trainable_params()) model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) else: @@ -314,7 +314,7 @@ class AleatoricLoss(Cell): self.exp = P.Exp() self.normal = C.normal self.to_tensor = P.ScalarToArray() - self.entropy = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") else: self.mean = P.ReduceMean() self.exp = P.Exp() diff --git a/model_zoo/official/cv/alexnet/eval.py b/model_zoo/official/cv/alexnet/eval.py index 6a091aedd89..7eb7905d38b 100644 --- a/model_zoo/official/cv/alexnet/eval.py +++ b/model_zoo/official/cv/alexnet/eval.py @@ -42,7 +42,7 @@ if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) network = AlexNet(cfg.num_classes) - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") repeat_size = cfg.epoch_size opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) diff --git a/model_zoo/official/cv/alexnet/train.py b/model_zoo/official/cv/alexnet/train.py index 4512244b922..83b2f9d3a19 100644 --- a/model_zoo/official/cv/alexnet/train.py +++ b/model_zoo/official/cv/alexnet/train.py @@ -45,7 +45,7 @@ if __name__ == "__main__": ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1) network = AlexNet(cfg.num_classes) - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size())) opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) diff --git a/model_zoo/official/cv/googlenet/eval.py b/model_zoo/official/cv/googlenet/eval.py index 31646c97135..4118a7294fd 100644 --- a/model_zoo/official/cv/googlenet/eval.py +++ b/model_zoo/official/cv/googlenet/eval.py @@ -41,7 +41,7 @@ if __name__ == '__main__': net = GoogleNet(num_classes=cfg.num_classes) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, weight_decay=cfg.weight_decay) - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) if device_target == "Ascend": diff --git a/model_zoo/official/cv/googlenet/train.py b/model_zoo/official/cv/googlenet/train.py index 5181f9c484a..78d4ec28f43 100644 --- a/model_zoo/official/cv/googlenet/train.py +++ b/model_zoo/official/cv/googlenet/train.py @@ -101,7 +101,7 @@ if __name__ == '__main__': lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') if device_target == "Ascend": model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, diff --git a/model_zoo/official/cv/lenet/eval.py b/model_zoo/official/cv/lenet/eval.py index bcd5503c399..69525d853f8 100644 --- a/model_zoo/official/cv/lenet/eval.py +++ b/model_zoo/official/cv/lenet/eval.py @@ -44,7 +44,7 @@ if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) network = LeNet5(cfg.num_classes) - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") repeat_size = cfg.epoch_size net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) diff --git a/model_zoo/official/cv/lenet/train.py b/model_zoo/official/cv/lenet/train.py index 2c45c5b3274..4dd09b16521 100644 --- a/model_zoo/official/cv/lenet/train.py +++ b/model_zoo/official/cv/lenet/train.py @@ -50,7 +50,7 @@ if __name__ == "__main__": cfg.batch_size) network = LeNet5(cfg.num_classes) - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, diff --git a/model_zoo/official/cv/lenet_quant/eval_quant.py b/model_zoo/official/cv/lenet_quant/eval_quant.py index f545a8a23a8..5b23b98ad23 100644 --- a/model_zoo/official/cv/lenet_quant/eval_quant.py +++ b/model_zoo/official/cv/lenet_quant/eval_quant.py @@ -53,7 +53,7 @@ if __name__ == "__main__": per_channel=[True, False]) # define loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) diff --git a/model_zoo/official/cv/lenet_quant/train_quant.py b/model_zoo/official/cv/lenet_quant/train_quant.py index 51d37cc1bfa..9d5462c37e7 100644 --- a/model_zoo/official/cv/lenet_quant/train_quant.py +++ b/model_zoo/official/cv/lenet_quant/train_quant.py @@ -62,7 +62,7 @@ if __name__ == "__main__": symmetric=[False, False]) # define network loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) diff --git a/model_zoo/official/cv/mobilenetv2/eval.py b/model_zoo/official/cv/mobilenetv2/eval.py index 897e7ffe274..e4ac99013ca 100644 --- a/model_zoo/official/cv/mobilenetv2/eval.py +++ b/model_zoo/official/cv/mobilenetv2/eval.py @@ -51,8 +51,7 @@ if __name__ == '__main__': else: raise ValueError("Unsupported device_target.") - loss = nn.SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction='mean') + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') if args_opt.device_target == "Ascend": net.to_float(mstype.float16) diff --git a/model_zoo/official/cv/mobilenetv2/train.py b/model_zoo/official/cv/mobilenetv2/train.py index 4fb800d6ddd..75255d90e06 100644 --- a/model_zoo/official/cv/mobilenetv2/train.py +++ b/model_zoo/official/cv/mobilenetv2/train.py @@ -172,7 +172,7 @@ if __name__ == '__main__': loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes) else: - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset epoch_size = config_gpu.epoch_size dataset = create_dataset(dataset_path=args_opt.dataset_path, @@ -236,8 +236,7 @@ if __name__ == '__main__': loss = CrossEntropyWithLabelSmooth( smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes) else: - loss = SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction='mean') + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, config=config_ascend, diff --git a/model_zoo/official/cv/mobilenetv2_quant/eval.py b/model_zoo/official/cv/mobilenetv2_quant/eval.py index e6b0875c75f..d00fada259f 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/eval.py +++ b/model_zoo/official/cv/mobilenetv2_quant/eval.py @@ -55,7 +55,7 @@ if __name__ == '__main__': # convert fusion network to quantization aware network network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) # define network loss - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset dataset = create_dataset(dataset_path=args_opt.dataset_path, diff --git a/model_zoo/official/cv/mobilenetv2_quant/train.py b/model_zoo/official/cv/mobilenetv2_quant/train.py index ebe60996cf9..5413619c850 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/train.py +++ b/model_zoo/official/cv/mobilenetv2_quant/train.py @@ -89,7 +89,7 @@ def train_on_ascend(): if config.label_smooth > 0: loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes) else: - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, @@ -150,7 +150,7 @@ def train_on_gpu(): loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes) else: - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset epoch_size = config.epoch_size dataset = create_dataset(dataset_path=args_opt.dataset_path, diff --git a/model_zoo/official/cv/mobilenetv3/eval.py b/model_zoo/official/cv/mobilenetv3/eval.py index 43ebb1f21a7..d7e076490f2 100644 --- a/model_zoo/official/cv/mobilenetv3/eval.py +++ b/model_zoo/official/cv/mobilenetv3/eval.py @@ -41,8 +41,7 @@ if __name__ == '__main__': else: raise ValueError("Unsupported device_target.") - loss = nn.SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction='mean') + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net = mobilenet_v3_large(num_classes=config.num_classes) dataset = create_dataset(dataset_path=args_opt.dataset_path, diff --git a/model_zoo/official/cv/mobilenetv3/train.py b/model_zoo/official/cv/mobilenetv3/train.py index 60f3723244e..fc49582a574 100644 --- a/model_zoo/official/cv/mobilenetv3/train.py +++ b/model_zoo/official/cv/mobilenetv3/train.py @@ -162,8 +162,7 @@ if __name__ == '__main__': loss = CrossEntropyWithLabelSmooth( smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes) else: - loss = SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction='mean') + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset epoch_size = config_gpu.epoch_size dataset = create_dataset(dataset_path=args_opt.dataset_path, diff --git a/model_zoo/official/cv/resnet/eval.py b/model_zoo/official/cv/resnet/eval.py index f7f0b593aea..570a26ee5cb 100755 --- a/model_zoo/official/cv/resnet/eval.py +++ b/model_zoo/official/cv/resnet/eval.py @@ -22,6 +22,7 @@ from mindspore import dataset as de from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net +from src.CrossEntropySmooth import CrossEntropySmooth parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101') @@ -79,8 +80,8 @@ if __name__ == '__main__': if args_opt.dataset == "imagenet2012": if not config.use_label_smooth: config.label_smooth_factor = 0.0 - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction='mean', + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) else: loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') diff --git a/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py new file mode 100644 index 00000000000..bf38c6e77b0 --- /dev/null +++ b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""define loss function for network""" +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common import dtype as mstype +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import functional as F +from mindspore.ops import operations as P + + +class CrossEntropySmooth(_Loss): + """CrossEntropy""" + def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): + super(CrossEntropySmooth, self).__init__() + self.onehot = P.OneHot() + self.sparse = sparse + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) + + def construct(self, logit, label): + if self.sparse: + label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, label) + return loss diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py index 0a891b91638..e7f231152cb 100755 --- a/model_zoo/official/cv/resnet/train.py +++ b/model_zoo/official/cv/resnet/train.py @@ -31,6 +31,7 @@ from mindspore.communication.management import init, get_rank, get_group_size import mindspore.nn as nn import mindspore.common.initializer as weight_init from src.lr_generator import get_lr, warmup_cosine_annealing_lr +from src.CrossEntropySmooth import CrossEntropySmooth parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101') @@ -145,8 +146,8 @@ if __name__ == '__main__': if args_opt.dataset == "imagenet2012": if not config.use_label_smooth: config.label_smooth_factor = 0.0 - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) else: loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) @@ -157,11 +158,10 @@ if __name__ == '__main__': if args_opt.dataset == "imagenet2012": if not config.use_label_smooth: config.label_smooth_factor = 0.0 - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False, - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) else: - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False, - num_classes=config.class_num) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") if args_opt.net == "resnet101" or args_opt.net == "resnet50": opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, diff --git a/model_zoo/official/cv/vgg16/eval.py b/model_zoo/official/cv/vgg16/eval.py index e0e9fd1fd0b..be9e6cbe123 100644 --- a/model_zoo/official/cv/vgg16/eval.py +++ b/model_zoo/official/cv/vgg16/eval.py @@ -134,7 +134,7 @@ def test(cloud_args=None): net = vgg16(num_classes=args.num_classes, args=args) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum, weight_decay=args.weight_decay) - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) param_dict = load_checkpoint(args.pre_trained) diff --git a/model_zoo/official/cv/vgg16/train.py b/model_zoo/official/cv/vgg16/train.py index ae2f934e1e7..aeae4584330 100644 --- a/model_zoo/official/cv/vgg16/train.py +++ b/model_zoo/official/cv/vgg16/train.py @@ -210,7 +210,7 @@ if __name__ == '__main__': loss_scale=args.loss_scale) if args.dataset == "cifar10": - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None) else: diff --git a/model_zoo/official/nlp/lstm/eval.py b/model_zoo/official/nlp/lstm/eval.py index 6d731fbd0df..8bb139c65ca 100644 --- a/model_zoo/official/nlp/lstm/eval.py +++ b/model_zoo/official/nlp/lstm/eval.py @@ -64,7 +64,7 @@ if __name__ == '__main__': weight=Tensor(embedding_table), batch_size=cfg.batch_size) - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) loss_cb = LossMonitor() diff --git a/model_zoo/official/nlp/lstm/train.py b/model_zoo/official/nlp/lstm/train.py index 53c3a89a6a3..7fa625db04d 100644 --- a/model_zoo/official/nlp/lstm/train.py +++ b/model_zoo/official/nlp/lstm/train.py @@ -70,7 +70,7 @@ if __name__ == '__main__': if args.pre_trained: load_param_into_net(network, load_checkpoint(args.pre_trained)) - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) loss_cb = LossMonitor() diff --git a/tests/st/fusion/test_conv_bn1_fusion.py b/tests/st/fusion/test_conv_bn1_fusion.py index 905179ee30c..51d1fac71b4 100644 --- a/tests/st/fusion/test_conv_bn1_fusion.py +++ b/tests/st/fusion/test_conv_bn1_fusion.py @@ -39,7 +39,7 @@ class MsWrapper(nn.Cell): def me_train_tensor(net, input_np, label_np, epoch_size=2): - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True) opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/st/host_device/test_host_device_lenet.py b/tests/st/host_device/test_host_device_lenet.py index 0a312a34221..80bf7b578a4 100644 --- a/tests/st/host_device/test_host_device_lenet.py +++ b/tests/st/host_device/test_host_device_lenet.py @@ -66,7 +66,7 @@ def train(net, data, label): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py index 37fd6363c06..3d7dada980d 100644 --- a/tests/st/nccl/test_nccl_lenet.py +++ b/tests/st/nccl/test_nccl_lenet.py @@ -85,7 +85,7 @@ def test_lenet_nccl(): learning_rate = multisteplr(epoch, 2) momentum = 0.9 mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, mom_optimizer) train_network.set_train() diff --git a/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py new file mode 100644 index 00000000000..bf38c6e77b0 --- /dev/null +++ b/tests/st/networks/models/resnet50/src/CrossEntropySmooth.py @@ -0,0 +1,38 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""define loss function for network""" +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common import dtype as mstype +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import functional as F +from mindspore.ops import operations as P + + +class CrossEntropySmooth(_Loss): + """CrossEntropy""" + def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): + super(CrossEntropySmooth, self).__init__() + self.onehot = P.OneHot() + self.sparse = sparse + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) + + def construct(self, logit, label): + if self.sparse: + label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, label) + return loss diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py index 220b9862085..26236a289a5 100644 --- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py +++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py @@ -35,12 +35,12 @@ from tests.st.networks.models.resnet50.src.dataset import create_dataset from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate from tests.st.networks.models.resnet50.src.config import config from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell +from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth from tests.st.networks.models.resnet50.src_thor.config import config as thor_config from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor from tests.st.networks.models.resnet50.src_thor.thor import THOR - MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_1.json" MINDSPORE_HCCL_CONFIG_PATH_2 = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_2.json" dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train" @@ -150,8 +150,8 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl): config.label_smooth_factor = 0.0 # loss - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor, - num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor, + num_classes=config.class_num) # train dataset dataset = create_dataset(dataset_path=dataset_path, do_train=True, @@ -259,9 +259,8 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): thor_config.label_smooth_factor = 0.0 # loss - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", - smooth_factor=thor_config.label_smooth_factor, - num_classes=thor_config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=thor_config.label_smooth_factor, + num_classes=thor_config.class_num) # train dataset dataset = create_dataset(dataset_path=dataset_path, do_train=True, diff --git a/tests/st/networks/test_cpu_lenet.py b/tests/st/networks/test_cpu_lenet.py index 9a11b23c87a..6d25e6a4713 100644 --- a/tests/st/networks/test_cpu_lenet.py +++ b/tests/st/networks/test_cpu_lenet.py @@ -60,7 +60,7 @@ def train(net, data, label): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/networks/test_gpu_alexnet.py b/tests/st/networks/test_gpu_alexnet.py index 7a55006571e..4ade9de314c 100644 --- a/tests/st/networks/test_gpu_alexnet.py +++ b/tests/st/networks/test_gpu_alexnet.py @@ -78,7 +78,7 @@ def test_trainTensor(num_classes=10, epoch=15, batch_size=32): lr = 0.1 momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network.set_train() diff --git a/tests/st/networks/test_gpu_lenet.py b/tests/st/networks/test_gpu_lenet.py index 4677c7ad008..ad77a691707 100644 --- a/tests/st/networks/test_gpu_lenet.py +++ b/tests/st/networks/test_gpu_lenet.py @@ -136,7 +136,7 @@ def test_train_lenet(): learning_rate = multisteplr(epoch, 30) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() @@ -192,7 +192,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, def test_train_and_eval_lenet(): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") network = LeNet5(10) - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) diff --git a/tests/st/networks/test_gpu_lstm.py b/tests/st/networks/test_gpu_lstm.py index bc59b7e3872..5604d9dd3dc 100644 --- a/tests/st/networks/test_gpu_lstm.py +++ b/tests/st/networks/test_gpu_lstm.py @@ -130,7 +130,7 @@ def test_LSTM(): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py index d440c5cacba..8444bd55c40 100644 --- a/tests/st/networks/test_gpu_resnet.py +++ b/tests/st/networks/test_gpu_resnet.py @@ -337,7 +337,7 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell( net_with_criterion, optimizer) # optimizer @@ -361,7 +361,7 @@ def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell( net_with_criterion, optimizer) # optimizer @@ -385,7 +385,7 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') train_network = amp.build_train_network( net, optimizer, criterion, level="O2") train_network.set_train() diff --git a/tests/st/networks/test_network_main.py b/tests/st/networks/test_network_main.py index a05798bfbec..1a8fed1fc11 100644 --- a/tests/st/networks/test_network_main.py +++ b/tests/st/networks/test_network_main.py @@ -39,7 +39,7 @@ def train(net, data, label): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/ops/cpu/test_momentum_op.py b/tests/st/ops/cpu/test_momentum_op.py index 717925c23e6..b35ec5da4ed 100644 --- a/tests/st/ops/cpu/test_momentum_op.py +++ b/tests/st/ops/cpu/test_momentum_op.py @@ -52,7 +52,7 @@ def test_momentum(): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/ops/gpu/test_adam_op.py b/tests/st/ops/gpu/test_adam_op.py index 6e2bb0ddab3..8c2e16e6386 100644 --- a/tests/st/ops/gpu/test_adam_op.py +++ b/tests/st/ops/gpu/test_adam_op.py @@ -49,7 +49,7 @@ def test_adam(): net = NetAdam() optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=0.01) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell( net_with_criterion, optimizer) diff --git a/tests/st/ops/gpu/test_ftrl_op.py b/tests/st/ops/gpu/test_ftrl_op.py index 55d5972c20f..e9518f7762f 100644 --- a/tests/st/ops/gpu/test_ftrl_op.py +++ b/tests/st/ops/gpu/test_ftrl_op.py @@ -49,7 +49,7 @@ def test_ftrl(): net = NetFtrl() optimizer = FTRL(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=0.01) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell( net_with_criterion, optimizer) diff --git a/tests/st/ops/gpu/test_momentum_op.py b/tests/st/ops/gpu/test_momentum_op.py index 48b1ed3380f..51ec0ffc7aa 100644 --- a/tests/st/ops/gpu/test_momentum_op.py +++ b/tests/st/ops/gpu/test_momentum_op.py @@ -52,7 +52,7 @@ def test_momentum(): momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/ops/gpu/test_sgd_op.py b/tests/st/ops/gpu/test_sgd_op.py index 85d470f50da..f959d879cb2 100644 --- a/tests/st/ops/gpu/test_sgd_op.py +++ b/tests/st/ops/gpu/test_sgd_op.py @@ -55,7 +55,7 @@ def test_SGD(): optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening, weight_decay, nesterov, loss_scale) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() diff --git a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py index d18eeeb0ad5..c677d8c79f3 100644 --- a/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py +++ b/tests/st/ops/gpu/test_sparse_softmax_cross_entropy_with_logits_op.py @@ -20,15 +20,13 @@ import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor - class NetSparseSoftmaxCrossEntropyWithLogits(nn.Cell): def __init__(self): super(NetSparseSoftmaxCrossEntropyWithLogits, self).__init__() - self.loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) - self.dlogits = nn.SoftmaxCrossEntropyWithLogits(is_grad=True, sparse=True) + self.loss = self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) def construct(self, logits, labels): - return (self.loss(logits, labels), self.dlogits(logits, labels)) + return self.loss(logits, labels) @pytest.mark.level0 @@ -39,29 +37,18 @@ def test_sparse_softmax_cross_entropy_with_logits(): [1, 10, 1], [10, 1, 1]]).astype(np.float32)) labels = Tensor(np.array([2, 1, 0]).astype(np.int32)) - expect_loss = 0.0002467 - expect_dlogits = np.array([[4.1126452e-05, 4.1126452e-05, -8.2234539e-05], - [4.1126452e-05, -8.2234539e-05, 4.1126452e-05], - [-8.2234539e-05, 4.1126452e-05, 4.1126452e-05]]).astype(np.float32) + expect_loss = [0.00024673, 0.00024673, 0.00024673] context.set_context(mode=context.GRAPH_MODE, device_target='GPU') sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits() output = sparse_softmax_cross_entropy_with_logits(logits, labels) error0 = 1.0e-6 - diff0 = output[0].asnumpy() - expect_loss + diff0 = output.asnumpy() - expect_loss assert np.all(abs(diff0) < error0) - error1 = np.ones(shape=[3, 3]) * 1.0e-6 - diff1 = output[1].asnumpy() - expect_dlogits - assert np.all(abs(diff1) < error1) - context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits() output = sparse_softmax_cross_entropy_with_logits(logits, labels) error0 = 1.0e-6 - diff0 = output[0].asnumpy() - expect_loss + diff0 = output.asnumpy() - expect_loss assert np.all(abs(diff0) < error0) - - error1 = np.ones(shape=[3, 3]) * 1.0e-6 - diff1 = output[1].asnumpy() - expect_dlogits - assert np.all(abs(diff1) < error1) diff --git a/tests/st/probability/test_bnn_layer.py b/tests/st/probability/test_bnn_layer.py index 742b17c2688..cdc16908c30 100644 --- a/tests/st/probability/test_bnn_layer.py +++ b/tests/st/probability/test_bnn_layer.py @@ -124,7 +124,7 @@ def validate_model(net, dataset): if __name__ == "__main__": network = BNNLeNet5() - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, 60000, 0.000001) diff --git a/tests/st/probability/test_transform_bnn_layer.py b/tests/st/probability/test_transform_bnn_layer.py index 3fd4bfd4001..52f0edffa78 100644 --- a/tests/st/probability/test_transform_bnn_layer.py +++ b/tests/st/probability/test_transform_bnn_layer.py @@ -125,7 +125,7 @@ def validate_model(net, dataset): if __name__ == "__main__": network = LeNet5() - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) net_with_loss = WithLossCell(network, criterion) diff --git a/tests/st/probability/test_transform_bnn_model.py b/tests/st/probability/test_transform_bnn_model.py index 5cc7733e891..008802b3d5e 100644 --- a/tests/st/probability/test_transform_bnn_model.py +++ b/tests/st/probability/test_transform_bnn_model.py @@ -124,7 +124,7 @@ def validate_model(net, dataset): if __name__ == "__main__": network = LeNet5() - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) net_with_loss = WithLossCell(network, criterion) diff --git a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py index a596e13c0f3..aecf8d781d5 100644 --- a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py +++ b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py @@ -73,9 +73,7 @@ def do_sparse_embedding(ps=False): optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters())) optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU") - criterion = nn.SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction="mean" - ) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network.set_train() diff --git a/tests/st/ps/full_ps/test_full_ps_lenet.py b/tests/st/ps/full_ps/test_full_ps_lenet.py index fbf48e5fb86..aca875f6fcc 100644 --- a/tests/st/ps/full_ps/test_full_ps_lenet.py +++ b/tests/st/ps/full_ps/test_full_ps_lenet.py @@ -123,7 +123,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, if __name__ == "__main__": network = LeNet5(10) network.set_param_ps() - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) diff --git a/tests/st/ps/multi_full_ps/test_multi_full_ps.py b/tests/st/ps/multi_full_ps/test_multi_full_ps.py index 30bf6176923..f53063a9a63 100644 --- a/tests/st/ps/multi_full_ps/test_multi_full_ps.py +++ b/tests/st/ps/multi_full_ps/test_multi_full_ps.py @@ -94,9 +94,7 @@ if __name__ == "__main__": np.random.seed(0) network = LeNet5(10) network.set_param_ps() - criterion = nn.SoftmaxCrossEntropyWithLogits( - is_grad=False, sparse=True, reduction="mean" - ) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) if device_target == "GPU": context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size()) diff --git a/tests/st/pynative/test_pynative_hook.py b/tests/st/pynative/test_pynative_hook.py index 99688697aef..e5cc6240144 100644 --- a/tests/st/pynative/test_pynative_hook.py +++ b/tests/st/pynative/test_pynative_hook.py @@ -159,7 +159,7 @@ def test_pynative_lenet_train_hook_function_print_and_save_grad(): cell_hook_function_print_grad) net = LeNet5(hook_function=function[0], cell_hook_function=function[1]) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False) net_with_criterion = WithLossCell(net, criterion) train_network = GradWrap(net_with_criterion) train_network.set_train() diff --git a/tests/st/pynative/test_pynative_mindarmour.py b/tests/st/pynative/test_pynative_mindarmour.py index 23e7b2d042d..dc52506dc8c 100644 --- a/tests/st/pynative/test_pynative_mindarmour.py +++ b/tests/st/pynative/test_pynative_mindarmour.py @@ -145,14 +145,14 @@ def test_multi_grads(): net = LeNet() # grad operation - loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse) + loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse) with_loss_cell = WithLossCell(net, loss_fn) grad_all = GradWrapWithLoss(with_loss_cell) grad_out = grad_all(Tensor(inputs_np), Tensor(labels_np)).asnumpy() assert np.any(grad_out != 0), 'grad result can not be all zeros' # train-one-step operation - loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse) + loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) loss_net = WithLossCell(net, loss_fn) diff --git a/tests/st/quantization/lenet_quant/test_lenet_quant.py b/tests/st/quantization/lenet_quant/test_lenet_quant.py index 361aa1abf62..1d1e8fb94a5 100644 --- a/tests/st/quantization/lenet_quant/test_lenet_quant.py +++ b/tests/st/quantization/lenet_quant/test_lenet_quant.py @@ -42,7 +42,7 @@ def train_lenet(): cfg.batch_size) network = LeNet5(cfg.num_classes) - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, @@ -74,7 +74,7 @@ def train_lenet_quant(): symmetric=[False, False]) # define network loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) @@ -104,7 +104,7 @@ def eval_quant(): per_channel=[True, False]) # define loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) diff --git a/tests/st/summary/test_summary.py b/tests/st/summary/test_summary.py index b81d15514af..7aa5d95358d 100644 --- a/tests/st/summary/test_summary.py +++ b/tests/st/summary/test_summary.py @@ -154,7 +154,7 @@ class TestSummary: def _run_network(self, dataset_sink_mode=True): lenet = LeNet5() - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9) model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()}) summary_dir = tempfile.mkdtemp(dir=self.base_summary_dir) diff --git a/tests/ut/python/exec/test_train.py b/tests/ut/python/exec/test_train.py index 2cd9b9cad47..618ad3c0341 100644 --- a/tests/ut/python/exec/test_train.py +++ b/tests/ut/python/exec/test_train.py @@ -31,7 +31,7 @@ def lr_gen(fn, epoch_size): def me_train_tensor(net, input_np, label_np, epoch_size=2): """me_train_tensor""" - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) Model(net, loss, opt) diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py index b09584f2989..04087cb0f0a 100644 --- a/tests/ut/python/exec/test_train_with_lars.py +++ b/tests/ut/python/exec/test_train_with_lars.py @@ -78,7 +78,7 @@ def lr_gen(fn, epoch_size): def me_train_tensor(net, input_np, label_np, epoch_size=2): """me_train_tensor""" - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # reorder the net parameters , leave the parameters that need to be passed into lars to the end part opt = Momentum(get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py index c93df7ffb1b..3ce0b274b60 100644 --- a/tests/ut/python/parallel/test_allreduce_fusion.py +++ b/tests/ut/python/parallel/test_allreduce_fusion.py @@ -113,7 +113,7 @@ def train_common(net): label = Tensor(np.ones([batch_size]), dtype=ms.int32) dataset = Dataset(predict, label, 2) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_alltoall.py b/tests/ut/python/parallel/test_alltoall.py index 96ff8435046..4f1794484d1 100644 --- a/tests/ut/python/parallel/test_alltoall.py +++ b/tests/ut/python/parallel/test_alltoall.py @@ -78,7 +78,7 @@ def all_to_all_common(strategy1): dataset = Dataset(predict, label, 2) net = all_to_all_net(strategy1) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.one_hot.set_strategy(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) diff --git a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py index 21d5003b4c0..32e597d8873 100644 --- a/tests/ut/python/parallel/test_batchnorm_batch_parallel.py +++ b/tests/ut/python/parallel/test_batchnorm_batch_parallel.py @@ -133,7 +133,7 @@ def test_batchnorm_batch_parallel(): dataset = DatasetLenet(predict, label, 2) net = batchnorm_net(num_classes) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) diff --git a/tests/ut/python/parallel/test_bn_prelu_cell.py b/tests/ut/python/parallel/test_bn_prelu_cell.py index 07f5d3906be..354add8511b 100644 --- a/tests/ut/python/parallel/test_bn_prelu_cell.py +++ b/tests/ut/python/parallel/test_bn_prelu_cell.py @@ -209,7 +209,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None): dataset = Dataset(predict, label, 2) net = bn_net() - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(strategy_loss) opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size) diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py index 0e70b2513c2..baab850b84a 100644 --- a/tests/ut/python/parallel/test_dataset_interface.py +++ b/tests/ut/python/parallel/test_dataset_interface.py @@ -79,7 +79,7 @@ def loss_scale_manager_common(strategy1): dataset = Dataset(predict, label, 2) net = all_to_all_net(strategy1) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) opt = Momentum(net.trainable_params(), learning_rate, momentum) scale_manager = DynamicLossScaleManager(32, 2, 2000) diff --git a/tests/ut/python/parallel/test_full_batch.py b/tests/ut/python/parallel/test_full_batch.py index 70a68a5b00c..68e77f04608 100644 --- a/tests/ut/python/parallel/test_full_batch.py +++ b/tests/ut/python/parallel/test_full_batch.py @@ -75,7 +75,7 @@ def all_to_all_common(strategy1): dataset = Dataset(predict, label, 2) net = all_to_all_net(strategy1) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.one_hot.set_strategy(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) diff --git a/tests/ut/python/parallel/test_one_dev.py b/tests/ut/python/parallel/test_one_dev.py index 056f4a15c75..7f10e3dc6ae 100644 --- a/tests/ut/python/parallel/test_one_dev.py +++ b/tests/ut/python/parallel/test_one_dev.py @@ -81,7 +81,7 @@ def all_to_all_common(): dataset = Dataset(predict, label, 2) net = all_to_all_net() - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) diff --git a/tests/ut/python/parallel/test_operator_model_parallel.py b/tests/ut/python/parallel/test_operator_model_parallel.py index 788521c5258..8cbfa14a91d 100644 --- a/tests/ut/python/parallel/test_operator_model_parallel.py +++ b/tests/ut/python/parallel/test_operator_model_parallel.py @@ -361,7 +361,7 @@ def test_resnet_operator_batch_parallel(): dataset = DatasetLenet(predict, label, 2) net = resnet_operator_net(num_classes) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) @@ -386,7 +386,7 @@ def test_resnet_model_parallel(): dataset = DatasetLenet(predict, label, 2) net = resnet_model_parallel_net(num_classes) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) diff --git a/tests/ut/python/parallel/test_prelu_cell.py b/tests/ut/python/parallel/test_prelu_cell.py index dca467ef8df..59d14359c19 100644 --- a/tests/ut/python/parallel/test_prelu_cell.py +++ b/tests/ut/python/parallel/test_prelu_cell.py @@ -107,7 +107,7 @@ def reshape_common(parallel_mode): dataset = Dataset(predict, label, 2) net = prelu_net() - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False) diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index 9cfb376e1b6..28a1a27f3f0 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -94,7 +94,7 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss dataset = Dataset(predict, label, 2) net = reshape_net(strategy0, strategy1, strategy2) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(strategy_loss) loss.one_hot.set_strategy(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) diff --git a/tests/ut/python/parallel/test_transpose.py b/tests/ut/python/parallel/test_transpose.py index b0b917bf191..300b5dae2e2 100644 --- a/tests/ut/python/parallel/test_transpose.py +++ b/tests/ut/python/parallel/test_transpose.py @@ -79,7 +79,7 @@ def transpose_common(strategy1, strategy2): dataset = Dataset(predict, label, 2) net = transpose_net(strategy1, strategy2) - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) opt = Momentum(net.trainable_params(), learning_rate, momentum) context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/ut/python/pynative_mode/test_hook.py b/tests/ut/python/pynative_mode/test_hook.py index 6c2204f3810..a138e6b098c 100644 --- a/tests/ut/python/pynative_mode/test_hook.py +++ b/tests/ut/python/pynative_mode/test_hook.py @@ -141,7 +141,7 @@ class GradWrap(nn.Cell): def test_hook(): net = LeNet5() optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) - criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False) net_with_criterion = WithLossCell(net, criterion) train_network = GradWrap(net_with_criterion) train_network.set_train() diff --git a/tests/ut/python/pynative_mode/test_pynative_model.py b/tests/ut/python/pynative_mode/test_pynative_model.py index a0469cdaf4b..ea40227e554 100644 --- a/tests/ut/python/pynative_mode/test_pynative_model.py +++ b/tests/ut/python/pynative_mode/test_pynative_model.py @@ -129,7 +129,7 @@ def test_lenet_grad(): verification_step = 0 net = LeNet5() - loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False) + loss = nn.SoftmaxCrossEntropyWithLogits() momen_opti = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_net = GradWrap(NetWithLossClass(net)) train_net.set_train() diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py index dae05e98302..56c18508488 100644 --- a/tests/ut/python/utils/test_serialize.py +++ b/tests/ut/python/utils/test_serialize.py @@ -282,7 +282,7 @@ def test_load_param_into_net(): def test_exec_save_checkpoint(): net = Net() - loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + loss = SoftmaxCrossEntropyWithLogits(sparse=True) opt = Momentum(net.trainable_params(), 0.0, 0.9, 0.0001, 1024) loss_net = WithLossCell(net, loss)