!18667 Remove redundant depend

Merge pull request !18667 from huangbingjian/remove_redundant_depend
This commit is contained in:
i-robot 2021-08-06 07:43:53 +00:00 committed by Gitee
commit 8a8851dc52
77 changed files with 252 additions and 344 deletions

View File

@ -310,8 +310,8 @@ class TrainingWrapper(nn.Cell):
else:
cond = self.less_equal(self.base, flag_sum)
ret = (loss, cond, sens)
return F.depend(ret, self.optimizer(grads))
self.optimizer(grads)
return (loss, cond, sens)
class CenterFaceWithNms(nn.Cell):

View File

@ -135,10 +135,8 @@ class CNNCTCTrainOneStepWithLossScaleCell(nn.Cell):
#apply grad reducer on grads
grads = self.grad_reducer(grads)
success = self.optimizer(grads)
ret = (loss, scaling_sens)
return F.depend(ret, success)
self.optimizer(grads)
return (loss, scaling_sens)
class CNNCTC_Model(nn.Cell):

View File

@ -108,4 +108,5 @@ class TrainOneStepCellWithGradClip(Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -184,4 +184,5 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -18,7 +18,6 @@ import time
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.train.callback import Callback
@ -140,4 +139,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -18,7 +18,6 @@ import time
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.train.callback import Callback
@ -150,4 +149,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -18,7 +18,6 @@ import time
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.train.callback import Callback
@ -147,4 +146,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -18,7 +18,6 @@ import time
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.train.callback import Callback
@ -146,5 +145,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -177,7 +177,8 @@ class TrainOneStepCell(nn.Cell):
if self.reduce_flag:
grads = self.grad_reducer(grads)
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class MaskRcnn_Mobilenetv1_Infer(nn.Cell):
def __init__(self, config):

View File

@ -934,4 +934,5 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -199,4 +199,5 @@ class TrainOneStepWithClipGradientCell(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -23,7 +23,6 @@ from mindspore import ParameterTuple
from mindspore.common.tensor import Tensor
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.train.callback import Callback
__all__ = ['LossCallBack', 'WithLossCell', 'TrainOneStepCell']
@ -144,4 +143,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(img, gt_text, gt_kernels, training_mask, self.sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -19,7 +19,6 @@ import numpy as np
import mindspore
import mindspore.nn as nn
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore import context, Tensor
@ -524,4 +523,5 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -316,7 +316,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class resnet(nn.Cell):
"""

View File

@ -525,7 +525,8 @@ class TrainingWrapper(nn.Cell):
if self.use_global_norm:
grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
grads = C.clip_by_global_norm(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class SSDWithMobileNetV2(nn.Cell):

View File

@ -105,4 +105,5 @@ class TrainOneStepCellWithGradClip(Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -444,4 +444,5 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -672,7 +672,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class YoloBoxScores(nn.Cell):

View File

@ -515,7 +515,8 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class Giou(nn.Cell):

View File

@ -427,7 +427,8 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class Giou(nn.Cell):

View File

@ -18,7 +18,6 @@ from mindspore.common.parameter import ParameterTuple
from mindspore import Tensor
from mindspore.common import dtype as mstype
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
@ -150,7 +149,8 @@ class TrainOneStepCell(nn.Cell):
loss = self.network(feature, biases)
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
grads = self.grad(self.network, weights)(feature, biases, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class TrainGAT(nn.Cell):

View File

@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertSquadCell(nn.Cell):
"""
@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertCLS(nn.Cell):
"""

View File

@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
if self.enable_clip_grad:
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
grad_scale = C.MultitypeFuncGraph("grad_scale")
@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
overflow = cond
if self.loss_scaling_manager is not None:
overflow = self.loss_scaling_manager(scaling_sens, cond)
succ = self.optimizer(grads, overflow)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
self.optimizer(grads, overflow)
return (loss, cond, scaling_sens)
cast = P.Cast()
add_grads = C.MultitypeFuncGraph("add_grads")
@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
accu_overflow = self.select(overflow, self.one, self.zero)
self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
if is_accu_step:
succ = False
else:
if not is_accu_step:
# apply grad reducer on grads
grads = self.grad_reducer(self.accu_grads)
scaling = scaling_sens * self.degree * self.accumulation_steps
@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
overflow = self.reshape(overflow, (()))
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, overflow)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
ret = (mean_loss, overflow, scaling_sens)
return F.depend(ret, succ)
return (mean_loss, overflow, scaling_sens)
class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):

View File

@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
if self.enable_clip_grad:
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
grad_scale = C.MultitypeFuncGraph("grad_scale")
@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
overflow = cond
if self.loss_scaling_manager is not None:
overflow = self.loss_scaling_manager(scaling_sens, cond)
succ = self.optimizer(grads, overflow)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
self.optimizer(grads, overflow)
return (loss, cond, scaling_sens)
cast = P.Cast()
add_grads = C.MultitypeFuncGraph("add_grads")
@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
accu_overflow = self.select(overflow, self.one, self.zero)
self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
if is_accu_step:
succ = False
else:
if not is_accu_step:
# apply grad reducer on grads
grads = self.grad_reducer(self.accu_grads)
scaling = scaling_sens * self.degree * self.accumulation_steps
@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
overflow = self.reshape(overflow, (()))
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, overflow)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
ret = (mean_loss, overflow, scaling_sens)
return F.depend(ret, succ)
return (mean_loss, overflow, scaling_sens)
class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):

View File

@ -254,11 +254,9 @@ class CPMTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
cond = self.get_overflow_status(status, grads)
overflow = self.process_loss_scale(cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
return F.depend(loss, succ), cond, scaling_sens
if not overflow:
self.optimizer(grads)
return loss, cond, scaling_sens
cast = P.Cast()
@ -352,7 +350,6 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
accu_overflow = self.select(overflow, self.one, self.zero)
if self.accumulation:
succ = False
self.accu_overflow = accu_overflow
else:
my_zero = F.depend(self.zero, accu_overflow)
@ -378,9 +375,7 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
overflow = self.reshape(overflow, (()))
overflow = self.process_loss_scale(overflow)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
return F.depend(loss, succ), overflow, scaling_sens
return loss, overflow, scaling_sens

View File

@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertSquadCell(nn.Cell):
"""
@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertCLS(nn.Cell):
"""

View File

@ -308,8 +308,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
mstype.float32))
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
grad_scale = C.MultitypeFuncGraph("grad_scale")
@ -397,12 +397,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@ -472,9 +469,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
overflow = cond
if self.loss_scaling_manager is not None:
overflow = self.loss_scaling_manager(scaling_sens, cond)
succ = self.optimizer(grads, overflow)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
self.optimizer(grads, overflow)
return (loss, cond, scaling_sens)
cast = P.Cast()
add_grads = C.MultitypeFuncGraph("add_grads")
@ -631,9 +627,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
accu_overflow = self.select(overflow, self.one, self.zero)
self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
if is_accu_step:
succ = False
else:
if not is_accu_step:
# apply grad reducer on grads
grads = self.grad_reducer(self.accu_grads)
scaling = scaling_sens * self.degree * self.accumulation_steps
@ -650,13 +644,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
overflow = self.reshape(overflow, (()))
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, overflow)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
ret = (mean_loss, overflow, scaling_sens)
return F.depend(ret, succ)
return (mean_loss, overflow, scaling_sens)
class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):

View File

@ -172,12 +172,9 @@ class ErnieFinetuneCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class ErnieCLS(nn.Cell):
"""

View File

@ -138,5 +138,5 @@ class FastTextTrainOneStepCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss

View File

@ -284,9 +284,6 @@ class GNMTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)

View File

@ -151,9 +151,6 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)

View File

@ -234,12 +234,9 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class GRUTrainOneStepCell(nn.TrainOneStepCell):
"""

View File

@ -368,10 +368,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
return (loss, cond, scaling_sens)

View File

@ -147,11 +147,9 @@ class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
overflow = self.process_loss_scale(cond)
# If overflow, surpass weights update
# if not, update weights
if overflow:
succ = False
else:
succ = self.optimizer(grads)
return F.depend(loss, succ), cond, scaling_sens
if not overflow:
self.optimizer(grads)
return loss, cond, scaling_sens
class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
"""
@ -255,9 +253,6 @@ class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, overflow, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, overflow, scaling_sens)

View File

@ -212,12 +212,9 @@ class BertTrainWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertTrainCell(nn.Cell):
@ -271,8 +268,8 @@ class BertTrainCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
class BertNetworkWithLoss_td(nn.Cell):
@ -451,12 +448,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertEvaluationCell(nn.Cell):
@ -507,5 +501,5 @@ class BertEvaluationCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss

View File

@ -285,12 +285,9 @@ class BertTrainWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertTrainCell(nn.Cell):
"""
@ -343,8 +340,8 @@ class BertTrainCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
class BertNetworkWithLoss_td(nn.Cell):
"""
@ -551,12 +548,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BertEvaluationCell(nn.Cell):
@ -606,5 +600,5 @@ class BertEvaluationCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss

View File

@ -187,8 +187,8 @@ class TransformerTrainOneStepCell(nn.TrainOneStepCell):
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
# apply grad reducer on grads
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
grad_scale = C.MultitypeFuncGraph("grad_scale")
@ -277,12 +277,9 @@ class TransformerTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell)
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
cast = P.Cast()
@ -444,9 +441,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
accu_overflow = self.select(overflow, self.one, self.zero)
self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
if is_accu_step:
succ = False
else:
if not is_accu_step:
# apply grad reducer on grads
grads = self.grad_reducer(self.accu_grads)
scaling = scaling_sens * self.degree * self.accumulation_steps
@ -463,10 +458,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
overflow = self.reshape(overflow, (()))
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, overflow)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
ret = (mean_loss, overflow, scaling_sens)
return F.depend(ret, succ)
return (mean_loss, overflow, scaling_sens)

View File

@ -20,7 +20,6 @@ from mindspore.nn.layer.activation import get_activation
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.common.initializer import initializer
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.context import ParallelMode
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@ -261,7 +260,8 @@ class TrainStepWrap(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class PredictWithSigmoid(nn.Cell):

View File

@ -15,7 +15,6 @@
"""define network"""
import mindspore.nn as nn
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@ -83,4 +82,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.net_with_loss, weights)(data3, data2, data1, label)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -14,7 +14,6 @@
# ============================================================================
"""define pretrain network"""
import mindspore.nn as nn
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore import ParameterTuple
@ -85,4 +84,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.net_with_loss, weights)(data1, data2, data3, label)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -14,7 +14,6 @@
# ============================================================================
"""define training network"""
import mindspore.nn as nn
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore import ParameterTuple
@ -84,4 +83,5 @@ class TrainOneStepCell(nn.Cell):
grads = self.grad(self.net_with_loss, weights)(data, label)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -116,7 +116,8 @@ class TrainOneStepCellGen(nn.Cell):
grads = self.grad(self.network, weights)(img_a, att_a, att_a_, att_b, att_b_, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads)), gf_loss, gc_loss, gr_loss
self.optimizer(grads)
return loss, gf_loss, gc_loss, gr_loss
class TrainOneStepCellDis(nn.Cell):
@ -152,4 +153,5 @@ class TrainOneStepCellDis(nn.Cell):
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads)), d_real_loss, d_fake_loss, dc_loss, df_gp
self.optimizer(grads)
return loss, d_real_loss, d_fake_loss, dc_loss, df_gp

View File

@ -138,10 +138,8 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
else:
cond = self.less_equal(self.base, flag_sum)
opt = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, opt)
self.optimizer(grads)
return (loss, cond, scaling_sens)
class BuildTrainNetworkV2(nn.Cell):

View File

@ -144,12 +144,9 @@ class IPTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class SupConLoss(nn.Cell):

View File

@ -23,7 +23,6 @@ from mindspore.common import dtype as mstype
from mindspore.context import ParallelMode
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
from mindspore.parallel._utils import _get_parallel_mode
from mindspore.train.serialization import save_checkpoint
@ -82,7 +81,8 @@ class MyTrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
def sub_mean(x):

View File

@ -225,11 +225,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell):
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
self.loss_scalar("loss", loss)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
return (loss, cond, scaling_sens)

View File

@ -22,7 +22,6 @@ from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits, L1Loss
from mindspore.nn import Momentum
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.common.initializer import HeNormal
from mindspore.common.initializer import Normal
from mindspore import Tensor
@ -382,7 +381,8 @@ class TrainStepWrap(nn.Cell):
if not self.is_train:
return loss
grads = self.grad(self.network, weights)(x, labels1, labels2)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class TestStepWrap(nn.Cell):

View File

@ -59,7 +59,8 @@ class TrainOneStepD(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads_d = self.grad_reducer(grads_d)
return ops.depend(ld, self.optimizer(grads_d))
self.optimizer(grads_d)
return ld
class TrainOnestepG(nn.Cell):
"""
@ -103,4 +104,5 @@ class TrainOnestepG(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads_g = self.grad_reducer(grads_g)
return ops.depend(lg, self.optimizer(grads_g))
self.optimizer(grads_g)
return lg

View File

@ -59,5 +59,6 @@ class TrainOnestepPSNR(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return ops.depend(psnr_loss, self.optimizer(grads))
self.optimizer(grads)
return psnr_loss

View File

@ -413,7 +413,8 @@ class TrainOneStepGenerator(nn.Cell):
grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
attr_diff, sens)
grads = self.grad_reducer(grads)
return (ops.depend(loss_G, self.optimizer(grads)), fake_x, loss_G,
self.optimizer(grads)
return (loss_G, fake_x, loss_G,
loss_fake_G, loss_cls_G, loss_rec_G, loss_adv_G)
@ -451,5 +452,6 @@ class TrainOneStepDiscriminator(nn.Cell):
grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
attr_diff, alpha, sens)
grads = self.grad_reducer(grads)
return (ops.depend(loss_D, self.optimizer(grads)), loss_D, loss_real_D,
self.optimizer(grads)
return (loss_D, loss_D, loss_real_D,
loss_fake_D, loss_cls_D, loss_gp_D, loss_adv_D, attr_diff)

View File

@ -19,7 +19,6 @@ import mindspore
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import ResizeNearestNeighbor
from mindspore import Tensor, ParameterTuple, Parameter
from mindspore.common.initializer import initializer, TruncatedNormal
@ -410,7 +409,8 @@ class TrainStepWrap(nn.Cell):
loss = self.network(image, label)
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
grads = self.grad(self.network, weights)(image, label, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
def get_AdvancedEast_net(args):

View File

@ -232,9 +232,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, kps,
kps_mask, reg, hm_hp, hp_offset,
hp_ind, hp_mask)
succ = self.optimizer(grads)
ret = loss
return ops.depend(ret, succ)
self.optimizer(grads)
return loss
class CenterNetWithLossScaleCell(nn.Cell):
@ -309,9 +308,8 @@ class CenterNetWithLossScaleCell(nn.Cell):
else:
cond = self.less_equal(self.base, flag_sum)
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return ops.depend(ret, succ)
self.optimizer(grads)
return (loss, cond, scaling_sens)
class CenterNetMultiPoseEval(nn.Cell):
"""

View File

@ -250,9 +250,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
weights = self.weights
loss = self.network(image, hm, reg_mask, ind, wh, reg)
grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
succ = self.optimizer(grads)
ret = loss
return ops.depend(ret, succ)
self.optimizer(grads)
return loss
class CenterNetWithLossScaleCell(nn.Cell):
@ -320,12 +319,9 @@ class CenterNetWithLossScaleCell(nn.Cell):
else:
cond = self.less_equal(self.base, flag_sum)
overflow = cond
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return ops.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class CenterNetDetEval(nn.Cell):

View File

@ -208,9 +208,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
weights = self.weights
loss = self.network(image, hm, reg_mask, ind, wh, reg)
grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
succ = self.optimizer(grads)
ret = loss
return ops.depend(ret, succ)
self.optimizer(grads)
return loss
class CenterNetWithLossScaleCell(nn.Cell):
@ -279,12 +278,9 @@ class CenterNetWithLossScaleCell(nn.Cell):
else:
cond = self.less_equal(self.base, flag_sum)
overflow = cond
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return ops.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class CenterNetDetEval(nn.Cell):

View File

@ -125,4 +125,5 @@ class MyTrainOneStepCell(nn.Cell):
grads = self.grad(self.network, weights)(*inputs, sens)
grads = self.grad_reducer(grads)
grads = ops.clip_by_global_norm(grads, 0.2)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -22,7 +22,6 @@ from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.ops.operations import Add, Split, Concat
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
from mindspore.ops import functional as F
from src.custom_op import SEBlock, GroupConv
from src.blocks_ms import Interpolate, FeatureFusionBlock
from src.loss import ScaleAndShiftInvariantLoss
@ -390,4 +389,5 @@ class TrainOneStepCell(nn.Cell):
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class retinanetInferWithDecoder(nn.Cell):

View File

@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class retinanetInferWithDecoder(nn.Cell):

View File

@ -591,7 +591,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class SSDWithGhostNet(nn.Cell):

View File

@ -388,7 +388,8 @@ class TrainingWrapper(nn.Cell):
if self.use_global_norm:
grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
grads = C.clip_by_global_norm(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class SSDWithMobileNetV2(nn.Cell):

View File

@ -296,7 +296,8 @@ class TrainingWrapper(nn.Cell):
if self.use_global_norm:
grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
grads = C.clip_by_global_norm(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -457,7 +457,8 @@ class TrainingWrapper(nn.Cell):
if self.use_global_norm:
grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
grads = C.clip_by_global_norm(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class SsdInferWithDecoder(nn.Cell):
"""

View File

@ -160,12 +160,9 @@ class GPT2FinetuneCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class GPT2LM(nn.Cell):

View File

@ -365,12 +365,8 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
if not overflow:
self.optimizer(grads)
self.loss_scalar("loss", loss)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
return (loss, cond, scaling_sens)

View File

@ -21,7 +21,6 @@ from mindspore.common.initializer import initializer
import mindspore.ops as P
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore import Parameter, ParameterTuple
from mindspore import Tensor
@ -351,7 +350,8 @@ class TrainStepWrap(nn.Cell):
grads = self.grad(self.network, weights)(cats_vals, num_vals, label, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class ModelBuilder:

View File

@ -18,7 +18,6 @@ import os
import numpy as np
from sklearn.metrics import roc_auc_score
import mindspore.common.dtype as mstype
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.nn import Dropout
@ -333,7 +332,8 @@ class TrainStepWrap(nn.Cell):
loss = self.network(batch_ids, batch_wts, label)
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) #
grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class PredictWithSigmoid(nn.Cell):

View File

@ -295,5 +295,5 @@ class NetworkNoClientTrainCell(nn.Cell):
self.cast(F.tuple_to_array((self.sens,)),
mstype.float32))
grads = self.hyper_map(F.partial(clip_grad, self.clip_type, self.clip_value), grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss

View File

@ -671,7 +671,8 @@ class TrainingWrapper(nn.Cell):
if self.reducer_flag:
# apply grad reducer on grads
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
class YoloBoxScores(nn.Cell):

View File

@ -59,7 +59,7 @@ class YoloBlock(nn.Cell):
Args:
in_channels: Integer. Input channel.
out_chls: Interger. Middle channel.
out_chls: Integer. Middle channel.
out_channels: Integer. Output channel.
Returns:
@ -108,7 +108,7 @@ class YOLOv3(nn.Cell):
Args:
backbone_shape: List. Darknet output channels shape.
backbone: Cell. Backbone Network.
out_channel: Interger. Output channel.
out_channel: Integer. Output channel.
Returns:
Tensor, output tensor.
@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell):
grads = self.grad(self.network, weights)(*args, sens)
if self.reducer_flag:
grads = self.grad_reducer(grads)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss

View File

@ -321,8 +321,8 @@ class BertTrainOneStepCell(nn.Cell):
# apply grad reducer on grads
grads = self.grad_reducer(grads)
succ = self.optimizer(grads)
return F.depend(loss, succ)
self.optimizer(grads)
return loss
grad_scale = C.MultitypeFuncGraph("grad_scale")
@ -431,9 +431,6 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)

View File

@ -122,12 +122,9 @@ class BertFinetuneCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond)
class BertCLSModel(nn.Cell):
"""

View File

@ -20,7 +20,6 @@ from mindspore.common.parameter import ParameterTuple, Parameter
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim import Momentum
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
@ -67,10 +66,11 @@ class TrainOneStepWithLarsCell(nn.Cell):
bias_grads = grads[self.slice_index: self.params_len]
lars_grads = self.lars(non_bias_weights, non_bias_grads, self.weight_decay)
new_grads = lars_grads + bias_grads
return F.depend(loss, self.optimizer(new_grads))
self.optimizer(new_grads)
return loss
# fn is a funcation use i as input
# fn is a function use i as input
def lr_gen(fn, epoch_size):
for i in range(epoch_size):
yield fn(i)

View File

@ -21,7 +21,7 @@ from mindspore import context
from mindspore.common.parameter import Parameter, ParameterTuple
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import composite as C, functional as F, operations as P
from mindspore.ops import composite as C, operations as P
from mindspore.train import Model
from mindspore.context import ParallelMode
from mindspore.train.loss_scale_manager import DynamicLossScaleManager
@ -114,7 +114,8 @@ class TrainOneStepCell(nn.Cell):
weights = self.weights
loss = self.network(data)
grads = self.grad(self.network, weights)(data, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
def loss_scale_manager_sens(strategy1, sens):

View File

@ -25,7 +25,6 @@ from mindspore.nn import Dense, Cell
from mindspore.nn.loss.loss import LossBase
from mindspore.nn.optim import Momentum
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.train import Model
from mindspore.context import ParallelMode
@ -121,7 +120,8 @@ class TrainOneStepCell(Cell):
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
grads = self.grad(self.network, weights)(data, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
def net_trains(criterion, rank):

View File

@ -105,12 +105,9 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
overflow = cond
if sens is None:
overflow = self.loss_scaling_manager(self.loss_scale, cond)
if overflow:
succ = False
else:
succ = self.optimizer(grads)
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)
if not overflow:
self.optimizer(grads)
return (loss, cond, scaling_sens)
class DatasetLenet(MindData):

View File

@ -24,7 +24,6 @@ from mindspore.common.parameter import ParameterTuple
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.optim.momentum import Momentum
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
from mindspore.parallel import set_algo_parameters
@ -419,7 +418,8 @@ class TrainOneStepCell(nn.Cell):
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
grads = self.grad(self.network, weights)(data, sens)
return F.depend(loss, self.optimizer(grads))
self.optimizer(grads)
return loss
def reshape_common2(parallel_mode, net):