diff --git a/model_zoo/official/cv/centerface/src/centerface.py b/model_zoo/official/cv/centerface/src/centerface.py index aae19169a39..b023ada4c6c 100644 --- a/model_zoo/official/cv/centerface/src/centerface.py +++ b/model_zoo/official/cv/centerface/src/centerface.py @@ -310,8 +310,8 @@ class TrainingWrapper(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) - ret = (loss, cond, sens) - return F.depend(ret, self.optimizer(grads)) + self.optimizer(grads) + return (loss, cond, sens) class CenterFaceWithNms(nn.Cell): diff --git a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py index 3e46d30db0f..60af01aae9f 100644 --- a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py +++ b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py @@ -135,10 +135,8 @@ class CNNCTCTrainOneStepWithLossScaleCell(nn.Cell): #apply grad reducer on grads grads = self.grad_reducer(grads) - success = self.optimizer(grads) - - ret = (loss, scaling_sens) - return F.depend(ret, success) + self.optimizer(grads) + return (loss, scaling_sens) class CNNCTC_Model(nn.Cell): diff --git a/model_zoo/official/cv/crnn/src/crnn_for_train.py b/model_zoo/official/cv/crnn/src/crnn_for_train.py index fad288c36f4..90a3d83e659 100644 --- a/model_zoo/official/cv/crnn/src/crnn_for_train.py +++ b/model_zoo/official/cv/crnn/src/crnn_for_train.py @@ -108,4 +108,5 @@ class TrainOneStepCellWithGradClip(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py index 172867b4b1b..1871eb65f58 100755 --- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py +++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py @@ -184,4 +184,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/ctpn/src/network_define.py b/model_zoo/official/cv/ctpn/src/network_define.py index e1458bdbac0..c95fbabdaf6 100644 --- a/model_zoo/official/cv/ctpn/src/network_define.py +++ b/model_zoo/official/cv/ctpn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -140,4 +139,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/deeptext/src/network_define.py b/model_zoo/official/cv/deeptext/src/network_define.py index 2fcd9bb6c44..0895741001b 100644 --- a/model_zoo/official/cv/deeptext/src/network_define.py +++ b/model_zoo/official/cv/deeptext/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -150,4 +149,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/faster_rcnn/src/network_define.py b/model_zoo/official/cv/faster_rcnn/src/network_define.py index 531cd32c6e5..4219667f84e 100644 --- a/model_zoo/official/cv/faster_rcnn/src/network_define.py +++ b/model_zoo/official/cv/faster_rcnn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -147,4 +146,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/maskrcnn/src/network_define.py b/model_zoo/official/cv/maskrcnn/src/network_define.py index 662cd99cefb..2269c23db49 100644 --- a/model_zoo/official/cv/maskrcnn/src/network_define.py +++ b/model_zoo/official/cv/maskrcnn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -146,5 +145,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py index 7825a19ebcc..4c5b4a89b45 100644 --- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py +++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py @@ -177,7 +177,8 @@ class TrainOneStepCell(nn.Cell): if self.reduce_flag: grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class MaskRcnn_Mobilenetv1_Infer(nn.Cell): def __init__(self, config): diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py index f54dc4edeed..39787b928a0 100755 --- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py +++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py @@ -934,4 +934,5 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/openpose/src/loss.py b/model_zoo/official/cv/openpose/src/loss.py index 943b033279f..312dba9a633 100644 --- a/model_zoo/official/cv/openpose/src/loss.py +++ b/model_zoo/official/cv/openpose/src/loss.py @@ -199,4 +199,5 @@ class TrainOneStepWithClipGradientCell(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/psenet/src/network_define.py b/model_zoo/official/cv/psenet/src/network_define.py index 09ffe610209..3f55a996903 100644 --- a/model_zoo/official/cv/psenet/src/network_define.py +++ b/model_zoo/official/cv/psenet/src/network_define.py @@ -23,7 +23,6 @@ from mindspore import ParameterTuple from mindspore.common.tensor import Tensor from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.train.callback import Callback __all__ = ['LossCallBack', 'WithLossCell', 'TrainOneStepCell'] @@ -144,4 +143,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(img, gt_text, gt_kernels, training_mask, self.sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/retinaface_resnet50/src/network.py b/model_zoo/official/cv/retinaface_resnet50/src/network.py index 337a4e9acac..3be88a8da28 100644 --- a/model_zoo/official/cv/retinaface_resnet50/src/network.py +++ b/model_zoo/official/cv/retinaface_resnet50/src/network.py @@ -19,7 +19,6 @@ import numpy as np import mindspore import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore import context, Tensor @@ -524,4 +523,5 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/retinanet/src/retinanet.py b/model_zoo/official/cv/retinanet/src/retinanet.py index 6e9c4f312b6..58557d8dbd8 100644 --- a/model_zoo/official/cv/retinanet/src/retinanet.py +++ b/model_zoo/official/cv/retinanet/src/retinanet.py @@ -316,7 +316,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class resnet(nn.Cell): """ diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py index 7108240ffc5..171c9178054 100644 --- a/model_zoo/official/cv/ssd/src/ssd.py +++ b/model_zoo/official/cv/ssd/src/ssd.py @@ -525,7 +525,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithMobileNetV2(nn.Cell): diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py index bc261c01a7e..82671e15e92 100755 --- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py +++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py @@ -105,4 +105,5 @@ class TrainOneStepCellWithGradClip(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py index b5cee676427..bd49548c69c 100644 --- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py @@ -444,4 +444,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py index 81a77d855f2..4e9747be0b8 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py @@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py index f1bfbe14550..91ac4081e4b 100644 --- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py +++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py @@ -672,7 +672,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class YoloBoxScores(nn.Cell): diff --git a/model_zoo/official/cv/yolov4/src/yolo.py b/model_zoo/official/cv/yolov4/src/yolo.py index e122b69ae1e..074016abeba 100644 --- a/model_zoo/official/cv/yolov4/src/yolo.py +++ b/model_zoo/official/cv/yolov4/src/yolo.py @@ -515,7 +515,8 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class Giou(nn.Cell): diff --git a/model_zoo/official/cv/yolov5/src/yolo.py b/model_zoo/official/cv/yolov5/src/yolo.py index c881fd6ce00..c514fb81c28 100644 --- a/model_zoo/official/cv/yolov5/src/yolo.py +++ b/model_zoo/official/cv/yolov5/src/yolo.py @@ -427,7 +427,8 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class Giou(nn.Cell): diff --git a/model_zoo/official/gnn/gat/src/utils.py b/model_zoo/official/gnn/gat/src/utils.py index c7bae8c8b86..441ef7c48ee 100644 --- a/model_zoo/official/gnn/gat/src/utils.py +++ b/model_zoo/official/gnn/gat/src/utils.py @@ -18,7 +18,6 @@ from mindspore.common.parameter import ParameterTuple from mindspore import Tensor from mindspore.common import dtype as mstype from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P @@ -150,7 +149,8 @@ class TrainOneStepCell(nn.Cell): loss = self.network(feature, biases) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(feature, biases, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class TrainGAT(nn.Cell): diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py index 210339ccd01..b59f310cbd7 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py @@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertSquadCell(nn.Cell): """ @@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py index 36fca77faef..433ef03c99b 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py @@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): if self.enable_clip_grad: grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py index 58770011b75..6b845d28da5 100644 --- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py @@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): if self.enable_clip_grad: grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/cpm/src/cpm_train.py b/model_zoo/official/nlp/cpm/src/cpm_train.py index 3087c3979a0..8c50abe4024 100644 --- a/model_zoo/official/nlp/cpm/src/cpm_train.py +++ b/model_zoo/official/nlp/cpm/src/cpm_train.py @@ -254,11 +254,9 @@ class CPMTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell): cond = self.get_overflow_status(status, grads) overflow = self.process_loss_scale(cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - return F.depend(loss, succ), cond, scaling_sens + if not overflow: + self.optimizer(grads) + return loss, cond, scaling_sens cast = P.Cast() @@ -352,7 +350,6 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell): accu_overflow = self.select(overflow, self.one, self.zero) if self.accumulation: - succ = False self.accu_overflow = accu_overflow else: my_zero = F.depend(self.zero, accu_overflow) @@ -378,9 +375,7 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell): overflow = self.reshape(overflow, (())) overflow = self.process_loss_scale(overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - return F.depend(loss, succ), overflow, scaling_sens + return loss, overflow, scaling_sens diff --git a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py index 16a8da5043b..265a6bb7584 100644 --- a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py @@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertSquadCell(nn.Cell): """ @@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py index c99c9318f4e..e75e928c97c 100644 --- a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py @@ -308,8 +308,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): mstype.float32)) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -397,12 +397,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -472,9 +469,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -631,9 +627,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -650,13 +644,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py index a951bc65eb7..93b6010517f 100755 --- a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py +++ b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py @@ -172,12 +172,9 @@ class ErnieFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class ErnieCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/fasttext/src/fasttext_train.py b/model_zoo/official/nlp/fasttext/src/fasttext_train.py index 86c0d6fbf04..cddd78227f0 100644 --- a/model_zoo/official/nlp/fasttext/src/fasttext_train.py +++ b/model_zoo/official/nlp/fasttext/src/fasttext_train.py @@ -138,5 +138,5 @@ class FastTextTrainOneStepCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py index 76d5aa0502f..2ec0b80a033 100644 --- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py +++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py @@ -284,9 +284,6 @@ class GNMTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py index b995daf283f..615c728f061 100644 --- a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py +++ b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py @@ -151,9 +151,6 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/gru/src/gru_for_train.py b/model_zoo/official/nlp/gru/src/gru_for_train.py index 50e028dafab..647eed4d101 100644 --- a/model_zoo/official/nlp/gru/src/gru_for_train.py +++ b/model_zoo/official/nlp/gru/src/gru_for_train.py @@ -234,12 +234,9 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class GRUTrainOneStepCell(nn.TrainOneStepCell): """ diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py index 23ff47d1a14..2164e17c1dc 100644 --- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py +++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py @@ -368,10 +368,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py index 4ea05370aa2..92d4100ea8a 100644 --- a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py +++ b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py @@ -147,11 +147,9 @@ class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell): overflow = self.process_loss_scale(cond) # If overflow, surpass weights update # if not, update weights - if overflow: - succ = False - else: - succ = self.optimizer(grads) - return F.depend(loss, succ), cond, scaling_sens + if not overflow: + self.optimizer(grads) + return loss, cond, scaling_sens class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): """ @@ -255,9 +253,6 @@ class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, overflow, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, overflow, scaling_sens) diff --git a/model_zoo/official/nlp/q8bert/src/q8bert.py b/model_zoo/official/nlp/q8bert/src/q8bert.py index c6549b30f84..e752e5d97ed 100644 --- a/model_zoo/official/nlp/q8bert/src/q8bert.py +++ b/model_zoo/official/nlp/q8bert/src/q8bert.py @@ -212,12 +212,9 @@ class BertTrainWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainCell(nn.Cell): @@ -271,8 +268,8 @@ class BertTrainCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss class BertNetworkWithLoss_td(nn.Cell): @@ -451,12 +448,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertEvaluationCell(nn.Cell): @@ -507,5 +501,5 @@ class BertEvaluationCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py index 3b1468fd41d..c2e8f9f91a3 100644 --- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py +++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py @@ -285,12 +285,9 @@ class BertTrainWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainCell(nn.Cell): """ @@ -343,8 +340,8 @@ class BertTrainCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss class BertNetworkWithLoss_td(nn.Cell): """ @@ -551,12 +548,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertEvaluationCell(nn.Cell): @@ -606,5 +600,5 @@ class BertEvaluationCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py index 05555bf2df6..8fa2ce1a227 100644 --- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py +++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py @@ -187,8 +187,8 @@ class TransformerTrainOneStepCell(nn.TrainOneStepCell): grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -277,12 +277,9 @@ class TransformerTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell) overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) cast = P.Cast() @@ -444,9 +441,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -463,10 +458,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) diff --git a/model_zoo/official/recommend/ncf/src/ncf.py b/model_zoo/official/recommend/ncf/src/ncf.py index 6a9bb21059f..c48af973ca7 100644 --- a/model_zoo/official/recommend/ncf/src/ncf.py +++ b/model_zoo/official/recommend/ncf/src/ncf.py @@ -20,7 +20,6 @@ from mindspore.nn.layer.activation import get_activation import mindspore.common.dtype as mstype from mindspore.ops import operations as P from mindspore.common.initializer import initializer -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.context import ParallelMode from mindspore.nn.wrap.grad_reducer import DistributedGradReducer @@ -261,7 +260,8 @@ class TrainStepWrap(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class PredictWithSigmoid(nn.Cell): diff --git a/model_zoo/research/cv/AVA_cifar/src/network_define.py b/model_zoo/research/cv/AVA_cifar/src/network_define.py index 8e102cd486a..132e7033b34 100644 --- a/model_zoo/research/cv/AVA_cifar/src/network_define.py +++ b/model_zoo/research/cv/AVA_cifar/src/network_define.py @@ -15,7 +15,6 @@ """define network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.nn.wrap.grad_reducer import DistributedGradReducer @@ -83,4 +82,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data3, data2, data1, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py index 1084f084168..4ab7d928e6f 100644 --- a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py +++ b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py @@ -14,7 +14,6 @@ # ============================================================================ """define pretrain network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore import ParameterTuple @@ -85,4 +84,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data1, data2, data3, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py index d5e4ad32fba..01167b1c6d6 100644 --- a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py +++ b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py @@ -14,7 +14,6 @@ # ============================================================================ """define training network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore import ParameterTuple @@ -84,4 +83,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AttGAN/src/cell.py b/model_zoo/research/cv/AttGAN/src/cell.py index 5271048c6ea..ec8d9a2928d 100644 --- a/model_zoo/research/cv/AttGAN/src/cell.py +++ b/model_zoo/research/cv/AttGAN/src/cell.py @@ -116,7 +116,8 @@ class TrainOneStepCellGen(nn.Cell): grads = self.grad(self.network, weights)(img_a, att_a, att_a_, att_b, att_b_, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)), gf_loss, gc_loss, gr_loss + self.optimizer(grads) + return loss, gf_loss, gc_loss, gr_loss class TrainOneStepCellDis(nn.Cell): @@ -152,4 +153,5 @@ class TrainOneStepCellDis(nn.Cell): if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)), d_real_loss, d_fake_loss, dc_loss, df_gp + self.optimizer(grads) + return loss, d_real_loss, d_fake_loss, dc_loss, df_gp diff --git a/model_zoo/research/cv/FaceDetection/src/network_define.py b/model_zoo/research/cv/FaceDetection/src/network_define.py index 6a342119c43..0284586929a 100644 --- a/model_zoo/research/cv/FaceDetection/src/network_define.py +++ b/model_zoo/research/cv/FaceDetection/src/network_define.py @@ -138,10 +138,8 @@ class TrainOneStepWithLossScaleCell(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) - opt = self.optimizer(grads) - - ret = (loss, cond, scaling_sens) - return F.depend(ret, opt) + self.optimizer(grads) + return (loss, cond, scaling_sens) class BuildTrainNetworkV2(nn.Cell): diff --git a/model_zoo/research/cv/IPT/src/loss.py b/model_zoo/research/cv/IPT/src/loss.py index 30ae4ea9f85..11a3a986ae9 100644 --- a/model_zoo/research/cv/IPT/src/loss.py +++ b/model_zoo/research/cv/IPT/src/loss.py @@ -144,12 +144,9 @@ class IPTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class SupConLoss(nn.Cell): diff --git a/model_zoo/research/cv/IPT/src/utils.py b/model_zoo/research/cv/IPT/src/utils.py index 9928281a0c7..e2d77b0d887 100644 --- a/model_zoo/research/cv/IPT/src/utils.py +++ b/model_zoo/research/cv/IPT/src/utils.py @@ -23,7 +23,6 @@ from mindspore.common import dtype as mstype from mindspore.context import ParallelMode from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.parallel._utils import _get_parallel_mode from mindspore.train.serialization import save_checkpoint @@ -82,7 +81,8 @@ class MyTrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def sub_mean(x): diff --git a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py index 12f118deb17..428e7ae5819 100644 --- a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py +++ b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py @@ -225,11 +225,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell): if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) self.loss_scalar("loss", loss) - ret = (loss, cond, scaling_sens) - - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py index df7ec1f42fe..15d38021b9e 100644 --- a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py +++ b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py @@ -22,7 +22,6 @@ from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits, L1Loss from mindspore.nn import Momentum from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.common.initializer import HeNormal from mindspore.common.initializer import Normal from mindspore import Tensor @@ -382,7 +381,8 @@ class TrainStepWrap(nn.Cell): if not self.is_train: return loss grads = self.grad(self.network, weights)(x, labels1, labels2) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class TestStepWrap(nn.Cell): diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py index 6c7b0792742..59cf30efd0c 100644 --- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py +++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py @@ -59,7 +59,8 @@ class TrainOneStepD(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads_d = self.grad_reducer(grads_d) - return ops.depend(ld, self.optimizer(grads_d)) + self.optimizer(grads_d) + return ld class TrainOnestepG(nn.Cell): """ @@ -103,4 +104,5 @@ class TrainOnestepG(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads_g = self.grad_reducer(grads_g) - return ops.depend(lg, self.optimizer(grads_g)) + self.optimizer(grads_g) + return lg diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py index e9182b755e8..620ef823124 100644 --- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py +++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py @@ -59,5 +59,6 @@ class TrainOnestepPSNR(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return ops.depend(psnr_loss, self.optimizer(grads)) + self.optimizer(grads) + return psnr_loss \ No newline at end of file diff --git a/model_zoo/research/cv/STGAN/src/models/networks.py b/model_zoo/research/cv/STGAN/src/models/networks.py index da83c30c7c8..1cbd4cfd5a3 100644 --- a/model_zoo/research/cv/STGAN/src/models/networks.py +++ b/model_zoo/research/cv/STGAN/src/models/networks.py @@ -413,7 +413,8 @@ class TrainOneStepGenerator(nn.Cell): grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg, attr_diff, sens) grads = self.grad_reducer(grads) - return (ops.depend(loss_G, self.optimizer(grads)), fake_x, loss_G, + self.optimizer(grads) + return (loss_G, fake_x, loss_G, loss_fake_G, loss_cls_G, loss_rec_G, loss_adv_G) @@ -451,5 +452,6 @@ class TrainOneStepDiscriminator(nn.Cell): grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg, attr_diff, alpha, sens) grads = self.grad_reducer(grads) - return (ops.depend(loss_D, self.optimizer(grads)), loss_D, loss_real_D, + self.optimizer(grads) + return (loss_D, loss_D, loss_real_D, loss_fake_D, loss_cls_D, loss_gp_D, loss_adv_D, attr_diff) diff --git a/model_zoo/research/cv/advanced_east/src/model.py b/model_zoo/research/cv/advanced_east/src/model.py index 532ec8d8cba..29f78eb3cce 100644 --- a/model_zoo/research/cv/advanced_east/src/model.py +++ b/model_zoo/research/cv/advanced_east/src/model.py @@ -19,7 +19,6 @@ import mindspore import mindspore.nn as nn from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import ResizeNearestNeighbor from mindspore import Tensor, ParameterTuple, Parameter from mindspore.common.initializer import initializer, TruncatedNormal @@ -410,7 +409,8 @@ class TrainStepWrap(nn.Cell): loss = self.network(image, label) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(image, label, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def get_AdvancedEast_net(args): diff --git a/model_zoo/research/cv/centernet/src/centernet_pose.py b/model_zoo/research/cv/centernet/src/centernet_pose.py index 929f658e481..a9a0322ee52 100644 --- a/model_zoo/research/cv/centernet/src/centernet_pose.py +++ b/model_zoo/research/cv/centernet/src/centernet_pose.py @@ -232,9 +232,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell): grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, kps, kps_mask, reg, hm_hp, hp_offset, hp_ind, hp_mask) - succ = self.optimizer(grads) - ret = loss - return ops.depend(ret, succ) + self.optimizer(grads) + return loss class CenterNetWithLossScaleCell(nn.Cell): @@ -309,9 +308,8 @@ class CenterNetWithLossScaleCell(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return ops.depend(ret, succ) + self.optimizer(grads) + return (loss, cond, scaling_sens) class CenterNetMultiPoseEval(nn.Cell): """ diff --git a/model_zoo/research/cv/centernet_det/src/centernet_det.py b/model_zoo/research/cv/centernet_det/src/centernet_det.py index 9ade7aa7418..c8bc5eaade7 100644 --- a/model_zoo/research/cv/centernet_det/src/centernet_det.py +++ b/model_zoo/research/cv/centernet_det/src/centernet_det.py @@ -250,9 +250,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell): weights = self.weights loss = self.network(image, hm, reg_mask, ind, wh, reg) grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg) - succ = self.optimizer(grads) - ret = loss - return ops.depend(ret, succ) + self.optimizer(grads) + return loss class CenterNetWithLossScaleCell(nn.Cell): @@ -320,12 +319,9 @@ class CenterNetWithLossScaleCell(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) overflow = cond - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return ops.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class CenterNetDetEval(nn.Cell): diff --git a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py index cf762a10b2c..8425faeeb74 100644 --- a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py +++ b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py @@ -208,9 +208,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell): weights = self.weights loss = self.network(image, hm, reg_mask, ind, wh, reg) grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg) - succ = self.optimizer(grads) - ret = loss - return ops.depend(ret, succ) + self.optimizer(grads) + return loss class CenterNetWithLossScaleCell(nn.Cell): @@ -279,12 +278,9 @@ class CenterNetWithLossScaleCell(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) overflow = cond - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return ops.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class CenterNetDetEval(nn.Cell): diff --git a/model_zoo/research/cv/dem/src/demnet.py b/model_zoo/research/cv/dem/src/demnet.py index 3ea6da1b37c..84c3d4ead06 100644 --- a/model_zoo/research/cv/dem/src/demnet.py +++ b/model_zoo/research/cv/dem/src/demnet.py @@ -125,4 +125,5 @@ class MyTrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(*inputs, sens) grads = self.grad_reducer(grads) grads = ops.clip_by_global_norm(grads, 0.2) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/midas/src/midas_net.py b/model_zoo/research/cv/midas/src/midas_net.py index fe2afed0a08..8df3c229e50 100644 --- a/model_zoo/research/cv/midas/src/midas_net.py +++ b/model_zoo/research/cv/midas/src/midas_net.py @@ -22,7 +22,6 @@ from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore.ops.operations import Add, Split, Concat from mindspore.nn.wrap.grad_reducer import DistributedGradReducer -from mindspore.ops import functional as F from src.custom_op import SEBlock, GroupConv from src.blocks_ms import Interpolate, FeatureFusionBlock from src.loss import ScaleAndShiftInvariantLoss @@ -390,4 +389,5 @@ class TrainOneStepCell(nn.Cell): if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py index b62bc8a6ac1..6b4dff20463 100644 --- a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py +++ b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py @@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class retinanetInferWithDecoder(nn.Cell): diff --git a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py index b62bc8a6ac1..6b4dff20463 100644 --- a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py +++ b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py @@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class retinanetInferWithDecoder(nn.Cell): diff --git a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py index c4c04105dd1..a57fcafb2d6 100644 --- a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py +++ b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py @@ -591,7 +591,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithGhostNet(nn.Cell): diff --git a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py index 7671660cbf3..ff5dfdfd9ef 100644 --- a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py +++ b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py @@ -388,7 +388,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithMobileNetV2(nn.Cell): diff --git a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py index 15191e29c11..c9df5eb3c54 100644 --- a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py +++ b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py @@ -296,7 +296,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/ssd_resnet50/src/ssd.py b/model_zoo/research/cv/ssd_resnet50/src/ssd.py index 7edccbaf659..7ec90034385 100644 --- a/model_zoo/research/cv/ssd_resnet50/src/ssd.py +++ b/model_zoo/research/cv/ssd_resnet50/src/ssd.py @@ -457,7 +457,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SsdInferWithDecoder(nn.Cell): """ diff --git a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py index 63ac1af76df..60073bb1320 100644 --- a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py +++ b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py @@ -160,12 +160,9 @@ class GPT2FinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class GPT2LM(nn.Cell): diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py index 8d153ea3c67..47b0aeea88b 100644 --- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py +++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py @@ -365,12 +365,8 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) self.loss_scalar("loss", loss) - - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py index 3de30f1a3b3..715c02ff1bf 100644 --- a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py +++ b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py @@ -21,7 +21,6 @@ from mindspore.common.initializer import initializer import mindspore.ops as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore import Parameter, ParameterTuple from mindspore import Tensor @@ -351,7 +350,8 @@ class TrainStepWrap(nn.Cell): grads = self.grad(self.network, weights)(cats_vals, num_vals, label, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class ModelBuilder: diff --git a/model_zoo/research/recommend/autodis/src/autodis.py b/model_zoo/research/recommend/autodis/src/autodis.py index a0fcd3a2799..5b838dd1c15 100644 --- a/model_zoo/research/recommend/autodis/src/autodis.py +++ b/model_zoo/research/recommend/autodis/src/autodis.py @@ -18,7 +18,6 @@ import os import numpy as np from sklearn.metrics import roc_auc_score import mindspore.common.dtype as mstype -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore.nn import Dropout @@ -333,7 +332,8 @@ class TrainStepWrap(nn.Cell): loss = self.network(batch_ids, batch_wts, label) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) # grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class PredictWithSigmoid(nn.Cell): diff --git a/tests/st/fl/albert/src/cell_wrapper.py b/tests/st/fl/albert/src/cell_wrapper.py index 477ddba3eba..187792c0543 100644 --- a/tests/st/fl/albert/src/cell_wrapper.py +++ b/tests/st/fl/albert/src/cell_wrapper.py @@ -295,5 +295,5 @@ class NetworkNoClientTrainCell(nn.Cell): self.cast(F.tuple_to_array((self.sens,)), mstype.float32)) grads = self.hyper_map(F.partial(clip_grad, self.clip_type, self.clip_value), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/tests/st/model_zoo_tests/yolov3/src/yolov3.py b/tests/st/model_zoo_tests/yolov3/src/yolov3.py index 7ddf3ae695f..643fe0be1d7 100644 --- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py +++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py @@ -671,7 +671,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class YoloBoxScores(nn.Cell): diff --git a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py index 273a45302e9..8a073f0fb40 100644 --- a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py +++ b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py @@ -59,7 +59,7 @@ class YoloBlock(nn.Cell): Args: in_channels: Integer. Input channel. - out_chls: Interger. Middle channel. + out_chls: Integer. Middle channel. out_channels: Integer. Output channel. Returns: @@ -108,7 +108,7 @@ class YOLOv3(nn.Cell): Args: backbone_shape: List. Darknet output channels shape. backbone: Cell. Backbone Network. - out_channel: Interger. Output channel. + out_channel: Integer. Output channel. Returns: Tensor, output tensor. @@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py index 0125875fd4f..a76ae7808f3 100644 --- a/tests/st/networks/models/bert/src/bert_for_pre_training.py +++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py @@ -321,8 +321,8 @@ class BertTrainOneStepCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -431,9 +431,6 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py index f76604ecfcf..2114dd12896 100644 --- a/tests/st/networks/models/bert/src/utils.py +++ b/tests/st/networks/models/bert/src/utils.py @@ -122,12 +122,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLSModel(nn.Cell): """ diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py index 04087cb0f0a..beec5d21b90 100644 --- a/tests/ut/python/exec/test_train_with_lars.py +++ b/tests/ut/python/exec/test_train_with_lars.py @@ -20,7 +20,6 @@ from mindspore.common.parameter import ParameterTuple, Parameter from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P @@ -67,10 +66,11 @@ class TrainOneStepWithLarsCell(nn.Cell): bias_grads = grads[self.slice_index: self.params_len] lars_grads = self.lars(non_bias_weights, non_bias_grads, self.weight_decay) new_grads = lars_grads + bias_grads - return F.depend(loss, self.optimizer(new_grads)) + self.optimizer(new_grads) + return loss -# fn is a funcation use i as input +# fn is a function use i as input def lr_gen(fn, epoch_size): for i in range(epoch_size): yield fn(i) diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py index fbe8a7b0480..a662ff81567 100644 --- a/tests/ut/python/parallel/test_dataset_interface.py +++ b/tests/ut/python/parallel/test_dataset_interface.py @@ -21,7 +21,7 @@ from mindspore import context from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum -from mindspore.ops import composite as C, functional as F, operations as P +from mindspore.ops import composite as C, operations as P from mindspore.train import Model from mindspore.context import ParallelMode from mindspore.train.loss_scale_manager import DynamicLossScaleManager @@ -114,7 +114,8 @@ class TrainOneStepCell(nn.Cell): weights = self.weights loss = self.network(data) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def loss_scale_manager_sens(strategy1, sens): diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py index ab6a2a6283b..d307fb7a57e 100644 --- a/tests/ut/python/parallel/test_gather_v2_primitive.py +++ b/tests/ut/python/parallel/test_gather_v2_primitive.py @@ -25,7 +25,6 @@ from mindspore.nn import Dense, Cell from mindspore.nn.loss.loss import LossBase from mindspore.nn.optim import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.train import Model from mindspore.context import ParallelMode @@ -121,7 +120,8 @@ class TrainOneStepCell(Cell): sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def net_trains(criterion, rank): diff --git a/tests/ut/python/parallel/test_loss_scale.py b/tests/ut/python/parallel/test_loss_scale.py index c707e1bedf4..ebf10b68141 100644 --- a/tests/ut/python/parallel/test_loss_scale.py +++ b/tests/ut/python/parallel/test_loss_scale.py @@ -105,12 +105,9 @@ class TrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class DatasetLenet(MindData): diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index 5db1eb409e2..9f1b81b057b 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -24,7 +24,6 @@ from mindspore.common.parameter import ParameterTuple from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell from mindspore.parallel import set_algo_parameters @@ -419,7 +418,8 @@ class TrainOneStepCell(nn.Cell): sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def reshape_common2(parallel_mode, net):