forked from mindspore-Ecosystem/mindspore
!13526 modify network_define for fasterrcnn/maskrcnn/maskrcnn_mobilenetv/deeptext
From: @huangbingjian Reviewed-by: @zh_qh,@zhunaipan Signed-off-by: @zh_qh
This commit is contained in:
commit
eaecc83ec2
|
@ -47,12 +47,7 @@ class LossCallBack(Callback):
|
||||||
raise ValueError("print_step must be int and >= 0.")
|
raise ValueError("print_step must be int and >= 0.")
|
||||||
self._per_print_times = per_print_times
|
self._per_print_times = per_print_times
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rank_id = rank_id
|
self.rank_id = rank_id
|
||||||
|
|
||||||
global time_stamp_init, time_stamp_first
|
global time_stamp_init, time_stamp_first
|
||||||
|
@ -62,54 +57,26 @@ class LossCallBack(Callback):
|
||||||
|
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
cb_params = run_context.original_args()
|
cb_params = run_context.original_args()
|
||||||
rpn_loss = cb_params.net_outputs[0].asnumpy()
|
loss = cb_params.net_outputs.asnumpy()
|
||||||
rcnn_loss = cb_params.net_outputs[1].asnumpy()
|
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
||||||
rpn_cls_loss = cb_params.net_outputs[2].asnumpy()
|
|
||||||
|
|
||||||
rpn_reg_loss = cb_params.net_outputs[3].asnumpy()
|
|
||||||
rcnn_cls_loss = cb_params.net_outputs[4].asnumpy()
|
|
||||||
rcnn_reg_loss = cb_params.net_outputs[5].asnumpy()
|
|
||||||
|
|
||||||
self.count += 1
|
self.count += 1
|
||||||
self.rpn_loss_sum += float(rpn_loss)
|
self.loss_sum += float(loss)
|
||||||
self.rcnn_loss_sum += float(rcnn_loss)
|
|
||||||
self.rpn_cls_loss_sum += float(rpn_cls_loss)
|
|
||||||
self.rpn_reg_loss_sum += float(rpn_reg_loss)
|
|
||||||
self.rcnn_cls_loss_sum += float(rcnn_cls_loss)
|
|
||||||
self.rcnn_reg_loss_sum += float(rcnn_reg_loss)
|
|
||||||
|
|
||||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
|
||||||
|
|
||||||
if self.count >= 1:
|
if self.count >= 1:
|
||||||
global time_stamp_first
|
global time_stamp_first
|
||||||
time_stamp_current = time.time()
|
time_stamp_current = time.time()
|
||||||
|
total_loss = self.loss_sum / self.count
|
||||||
rpn_loss = self.rpn_loss_sum / self.count
|
|
||||||
rcnn_loss = self.rcnn_loss_sum / self.count
|
|
||||||
rpn_cls_loss = self.rpn_cls_loss_sum / self.count
|
|
||||||
|
|
||||||
rpn_reg_loss = self.rpn_reg_loss_sum / self.count
|
|
||||||
rcnn_cls_loss = self.rcnn_cls_loss_sum / self.count
|
|
||||||
rcnn_reg_loss = self.rcnn_reg_loss_sum / self.count
|
|
||||||
|
|
||||||
total_loss = rpn_loss + rcnn_loss
|
|
||||||
|
|
||||||
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
||||||
loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, "
|
loss_file.write("%lu epoch: %s step: %s ,total_loss: %.5f" %
|
||||||
"rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, total_loss: %.5f" %
|
|
||||||
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
||||||
rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss,
|
total_loss))
|
||||||
rcnn_cls_loss, rcnn_reg_loss, total_loss))
|
|
||||||
loss_file.write("\n")
|
loss_file.write("\n")
|
||||||
loss_file.close()
|
loss_file.close()
|
||||||
|
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
|
|
||||||
|
|
||||||
class LossNet(nn.Cell):
|
class LossNet(nn.Cell):
|
||||||
|
@ -157,7 +124,6 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
network (Cell): The training network.
|
network (Cell): The training network.
|
||||||
network_backbone (Cell): The forward network.
|
|
||||||
optimizer (Cell): Optimizer for updating the weights.
|
optimizer (Cell): Optimizer for updating the weights.
|
||||||
sens (Number): The adjust parameter. Default value is 1.0.
|
sens (Number): The adjust parameter. Default value is 1.0.
|
||||||
reduce_flag (bool): The reduce flag. Default value is False.
|
reduce_flag (bool): The reduce flag. Default value is False.
|
||||||
|
@ -165,11 +131,10 @@ class TrainOneStepCell(nn.Cell):
|
||||||
degree (int): Device number. Default value is None.
|
degree (int): Device number. Default value is None.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
def __init__(self, network, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
||||||
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
||||||
self.network = network
|
self.network = network
|
||||||
self.network.set_grad()
|
self.network.set_grad()
|
||||||
self.backbone = network_backbone
|
|
||||||
self.weights = ParameterTuple(network.trainable_params())
|
self.weights = ParameterTuple(network.trainable_params())
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
self.grad = C.GradOperation(get_by_list=True,
|
self.grad = C.GradOperation(get_by_list=True,
|
||||||
|
@ -181,8 +146,8 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
|
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
|
||||||
weights = self.weights
|
weights = self.weights
|
||||||
loss1, loss2, loss3, loss4, loss5, loss6 = self.backbone(x, img_shape, gt_bboxe, gt_label, gt_num)
|
loss = self.network(x, img_shape, gt_bboxe, gt_label, gt_num)
|
||||||
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
|
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
|
||||||
if self.reduce_flag:
|
if self.reduce_flag:
|
||||||
grads = self.grad_reducer(grads)
|
grads = self.grad_reducer(grads)
|
||||||
return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6
|
return F.depend(loss, self.optimizer(grads))
|
||||||
|
|
|
@ -120,10 +120,10 @@ if __name__ == '__main__':
|
||||||
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
|
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
|
||||||
net_with_loss = WithLossCell(net, loss)
|
net_with_loss = WithLossCell(net, loss)
|
||||||
if args_opt.run_distribute:
|
if args_opt.run_distribute:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True,
|
||||||
mean=True, degree=device_num)
|
mean=True, degree=device_num)
|
||||||
else:
|
else:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale)
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale)
|
||||||
|
|
||||||
time_cb = TimeMonitor(data_size=dataset_size)
|
time_cb = TimeMonitor(data_size=dataset_size)
|
||||||
loss_cb = LossCallBack(rank_id=rank)
|
loss_cb = LossCallBack(rank_id=rank)
|
||||||
|
|
|
@ -47,12 +47,7 @@ class LossCallBack(Callback):
|
||||||
raise ValueError("print_step must be int and >= 0.")
|
raise ValueError("print_step must be int and >= 0.")
|
||||||
self._per_print_times = per_print_times
|
self._per_print_times = per_print_times
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rank_id = rank_id
|
self.rank_id = rank_id
|
||||||
|
|
||||||
global time_stamp_init, time_stamp_first
|
global time_stamp_init, time_stamp_first
|
||||||
|
@ -62,54 +57,26 @@ class LossCallBack(Callback):
|
||||||
|
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
cb_params = run_context.original_args()
|
cb_params = run_context.original_args()
|
||||||
rpn_loss = cb_params.net_outputs[0].asnumpy()
|
loss = cb_params.net_outputs.asnumpy()
|
||||||
rcnn_loss = cb_params.net_outputs[1].asnumpy()
|
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
||||||
rpn_cls_loss = cb_params.net_outputs[2].asnumpy()
|
|
||||||
|
|
||||||
rpn_reg_loss = cb_params.net_outputs[3].asnumpy()
|
|
||||||
rcnn_cls_loss = cb_params.net_outputs[4].asnumpy()
|
|
||||||
rcnn_reg_loss = cb_params.net_outputs[5].asnumpy()
|
|
||||||
|
|
||||||
self.count += 1
|
self.count += 1
|
||||||
self.rpn_loss_sum += float(rpn_loss)
|
self.loss_sum += float(loss)
|
||||||
self.rcnn_loss_sum += float(rcnn_loss)
|
|
||||||
self.rpn_cls_loss_sum += float(rpn_cls_loss)
|
|
||||||
self.rpn_reg_loss_sum += float(rpn_reg_loss)
|
|
||||||
self.rcnn_cls_loss_sum += float(rcnn_cls_loss)
|
|
||||||
self.rcnn_reg_loss_sum += float(rcnn_reg_loss)
|
|
||||||
|
|
||||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
|
||||||
|
|
||||||
if self.count >= 1:
|
if self.count >= 1:
|
||||||
global time_stamp_first
|
global time_stamp_first
|
||||||
time_stamp_current = time.time()
|
time_stamp_current = time.time()
|
||||||
|
total_loss = self.loss_sum / self.count
|
||||||
rpn_loss = self.rpn_loss_sum/self.count
|
|
||||||
rcnn_loss = self.rcnn_loss_sum/self.count
|
|
||||||
rpn_cls_loss = self.rpn_cls_loss_sum/self.count
|
|
||||||
|
|
||||||
rpn_reg_loss = self.rpn_reg_loss_sum/self.count
|
|
||||||
rcnn_cls_loss = self.rcnn_cls_loss_sum/self.count
|
|
||||||
rcnn_reg_loss = self.rcnn_reg_loss_sum/self.count
|
|
||||||
|
|
||||||
total_loss = rpn_loss + rcnn_loss
|
|
||||||
|
|
||||||
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
||||||
loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, "
|
loss_file.write("%lu epoch: %s step: %s ,total_loss: %.5f" %
|
||||||
"rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, total_loss: %.5f" %
|
|
||||||
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
||||||
rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss,
|
total_loss))
|
||||||
rcnn_cls_loss, rcnn_reg_loss, total_loss))
|
|
||||||
loss_file.write("\n")
|
loss_file.write("\n")
|
||||||
loss_file.close()
|
loss_file.close()
|
||||||
|
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
|
|
||||||
|
|
||||||
class LossNet(nn.Cell):
|
class LossNet(nn.Cell):
|
||||||
|
@ -155,18 +122,16 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
network (Cell): The training network.
|
network (Cell): The training network.
|
||||||
network_backbone (Cell): The forward network.
|
|
||||||
optimizer (Cell): Optimizer for updating the weights.
|
optimizer (Cell): Optimizer for updating the weights.
|
||||||
sens (Number): The adjust parameter. Default value is 1.0.
|
sens (Number): The adjust parameter. Default value is 1.0.
|
||||||
reduce_flag (bool): The reduce flag. Default value is False.
|
reduce_flag (bool): The reduce flag. Default value is False.
|
||||||
mean (bool): Allreduce method. Default value is False.
|
mean (bool): Allreduce method. Default value is False.
|
||||||
degree (int): Device number. Default value is None.
|
degree (int): Device number. Default value is None.
|
||||||
"""
|
"""
|
||||||
def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
def __init__(self, network, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
||||||
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
||||||
self.network = network
|
self.network = network
|
||||||
self.network.set_grad()
|
self.network.set_grad()
|
||||||
self.backbone = network_backbone
|
|
||||||
self.weights = ParameterTuple(network.trainable_params())
|
self.weights = ParameterTuple(network.trainable_params())
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
self.grad = C.GradOperation(get_by_list=True,
|
self.grad = C.GradOperation(get_by_list=True,
|
||||||
|
@ -178,8 +143,8 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
|
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
|
||||||
weights = self.weights
|
weights = self.weights
|
||||||
loss1, loss2, loss3, loss4, loss5, loss6 = self.backbone(x, img_shape, gt_bboxe, gt_label, gt_num)
|
loss = self.network(x, img_shape, gt_bboxe, gt_label, gt_num)
|
||||||
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
|
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
|
||||||
if self.reduce_flag:
|
if self.reduce_flag:
|
||||||
grads = self.grad_reducer(grads)
|
grads = self.grad_reducer(grads)
|
||||||
return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6
|
return F.depend(loss, self.optimizer(grads))
|
||||||
|
|
|
@ -159,10 +159,10 @@ if __name__ == '__main__':
|
||||||
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
|
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
|
||||||
net_with_loss = WithLossCell(net, loss)
|
net_with_loss = WithLossCell(net, loss)
|
||||||
if args_opt.run_distribute:
|
if args_opt.run_distribute:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True,
|
||||||
mean=True, degree=device_num)
|
mean=True, degree=device_num)
|
||||||
else:
|
else:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale)
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale)
|
||||||
|
|
||||||
time_cb = TimeMonitor(data_size=dataset_size)
|
time_cb = TimeMonitor(data_size=dataset_size)
|
||||||
loss_cb = LossCallBack(rank_id=rank)
|
loss_cb = LossCallBack(rank_id=rank)
|
||||||
|
|
|
@ -46,13 +46,7 @@ class LossCallBack(Callback):
|
||||||
raise ValueError("print_step must be int and >= 0.")
|
raise ValueError("print_step must be int and >= 0.")
|
||||||
self._per_print_times = per_print_times
|
self._per_print_times = per_print_times
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rcnn_mask_loss_sum = 0
|
|
||||||
self.rank_id = rank_id
|
self.rank_id = rank_id
|
||||||
|
|
||||||
global time_stamp_init, time_stamp_first
|
global time_stamp_init, time_stamp_first
|
||||||
|
@ -62,59 +56,26 @@ class LossCallBack(Callback):
|
||||||
|
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
cb_params = run_context.original_args()
|
cb_params = run_context.original_args()
|
||||||
rpn_loss = cb_params.net_outputs[0].asnumpy()
|
loss = cb_params.net_outputs.asnumpy()
|
||||||
rcnn_loss = cb_params.net_outputs[1].asnumpy()
|
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
||||||
rpn_cls_loss = cb_params.net_outputs[2].asnumpy()
|
|
||||||
|
|
||||||
rpn_reg_loss = cb_params.net_outputs[3].asnumpy()
|
|
||||||
rcnn_cls_loss = cb_params.net_outputs[4].asnumpy()
|
|
||||||
rcnn_reg_loss = cb_params.net_outputs[5].asnumpy()
|
|
||||||
rcnn_mask_loss = cb_params.net_outputs[6].asnumpy()
|
|
||||||
|
|
||||||
self.count += 1
|
self.count += 1
|
||||||
self.rpn_loss_sum += float(rpn_loss)
|
self.loss_sum += float(loss)
|
||||||
self.rcnn_loss_sum += float(rcnn_loss)
|
|
||||||
self.rpn_cls_loss_sum += float(rpn_cls_loss)
|
|
||||||
self.rpn_reg_loss_sum += float(rpn_reg_loss)
|
|
||||||
self.rcnn_cls_loss_sum += float(rcnn_cls_loss)
|
|
||||||
self.rcnn_reg_loss_sum += float(rcnn_reg_loss)
|
|
||||||
self.rcnn_mask_loss_sum += float(rcnn_mask_loss)
|
|
||||||
|
|
||||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
|
||||||
|
|
||||||
if self.count >= 1:
|
if self.count >= 1:
|
||||||
global time_stamp_first
|
global time_stamp_first
|
||||||
time_stamp_current = time.time()
|
time_stamp_current = time.time()
|
||||||
|
total_loss = self.loss_sum / self.count
|
||||||
rpn_loss = self.rpn_loss_sum/self.count
|
|
||||||
rcnn_loss = self.rcnn_loss_sum/self.count
|
|
||||||
rpn_cls_loss = self.rpn_cls_loss_sum/self.count
|
|
||||||
|
|
||||||
rpn_reg_loss = self.rpn_reg_loss_sum/self.count
|
|
||||||
rcnn_cls_loss = self.rcnn_cls_loss_sum/self.count
|
|
||||||
rcnn_reg_loss = self.rcnn_reg_loss_sum/self.count
|
|
||||||
rcnn_mask_loss = self.rcnn_mask_loss_sum/self.count
|
|
||||||
|
|
||||||
total_loss = rpn_loss + rcnn_loss
|
|
||||||
|
|
||||||
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
||||||
loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, "
|
loss_file.write("%lu epoch: %s step: %s ,total_loss: %.5f" %
|
||||||
"rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, rcnn_mask_loss: %.5f, "
|
|
||||||
"total_loss: %.5f" %
|
|
||||||
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
||||||
rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss,
|
total_loss))
|
||||||
rcnn_cls_loss, rcnn_reg_loss, rcnn_mask_loss, total_loss))
|
|
||||||
loss_file.write("\n")
|
loss_file.write("\n")
|
||||||
loss_file.close()
|
loss_file.close()
|
||||||
|
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rcnn_mask_loss_sum = 0
|
|
||||||
|
|
||||||
class LossNet(nn.Cell):
|
class LossNet(nn.Cell):
|
||||||
"""MaskRcnn loss method"""
|
"""MaskRcnn loss method"""
|
||||||
|
@ -159,18 +120,16 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
network (Cell): The training network.
|
network (Cell): The training network.
|
||||||
network_backbone (Cell): The forward network.
|
|
||||||
optimizer (Cell): Optimizer for updating the weights.
|
optimizer (Cell): Optimizer for updating the weights.
|
||||||
sens (Number): The adjust parameter. Default value is 1.0.
|
sens (Number): The adjust parameter. Default value is 1.0.
|
||||||
reduce_flag (bool): The reduce flag. Default value is False.
|
reduce_flag (bool): The reduce flag. Default value is False.
|
||||||
mean (bool): Allreduce method. Default value is False.
|
mean (bool): Allreduce method. Default value is False.
|
||||||
degree (int): Device number. Default value is None.
|
degree (int): Device number. Default value is None.
|
||||||
"""
|
"""
|
||||||
def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
def __init__(self, network, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
||||||
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
||||||
self.network = network
|
self.network = network
|
||||||
self.network.set_grad()
|
self.network.set_grad()
|
||||||
self.backbone = network_backbone
|
|
||||||
self.weights = ParameterTuple(network.trainable_params())
|
self.weights = ParameterTuple(network.trainable_params())
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
self.grad = C.GradOperation(get_by_list=True,
|
self.grad = C.GradOperation(get_by_list=True,
|
||||||
|
@ -183,10 +142,9 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask):
|
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask):
|
||||||
weights = self.weights
|
weights = self.weights
|
||||||
loss1, loss2, loss3, loss4, loss5, loss6, loss7 = self.backbone(x, img_shape, gt_bboxe, gt_label,
|
loss = self.network(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask)
|
||||||
gt_num, gt_mask)
|
|
||||||
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
|
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
|
||||||
if self.reduce_flag:
|
if self.reduce_flag:
|
||||||
grads = self.grad_reducer(grads)
|
grads = self.grad_reducer(grads)
|
||||||
|
|
||||||
return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6, loss7
|
return F.depend(loss, self.optimizer(grads))
|
||||||
|
|
|
@ -124,10 +124,10 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
net_with_loss = WithLossCell(net, loss)
|
net_with_loss = WithLossCell(net, loss)
|
||||||
if args_opt.run_distribute:
|
if args_opt.run_distribute:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True,
|
||||||
mean=True, degree=device_num)
|
mean=True, degree=device_num)
|
||||||
else:
|
else:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale)
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale)
|
||||||
|
|
||||||
time_cb = TimeMonitor(data_size=dataset_size)
|
time_cb = TimeMonitor(data_size=dataset_size)
|
||||||
loss_cb = LossCallBack(rank_id=rank)
|
loss_cb = LossCallBack(rank_id=rank)
|
||||||
|
|
|
@ -75,13 +75,7 @@ class LossCallBack(Callback):
|
||||||
raise ValueError("print_step must be int and >= 0.")
|
raise ValueError("print_step must be int and >= 0.")
|
||||||
self._per_print_times = per_print_times
|
self._per_print_times = per_print_times
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rcnn_mask_loss_sum = 0
|
|
||||||
self.rank_id = rank_id
|
self.rank_id = rank_id
|
||||||
|
|
||||||
global time_stamp_init, time_stamp_first
|
global time_stamp_init, time_stamp_first
|
||||||
|
@ -91,59 +85,26 @@ class LossCallBack(Callback):
|
||||||
|
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
cb_params = run_context.original_args()
|
cb_params = run_context.original_args()
|
||||||
rpn_loss = cb_params.net_outputs[0].asnumpy()
|
loss = cb_params.net_outputs.asnumpy()
|
||||||
rcnn_loss = cb_params.net_outputs[1].asnumpy()
|
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
||||||
rpn_cls_loss = cb_params.net_outputs[2].asnumpy()
|
|
||||||
|
|
||||||
rpn_reg_loss = cb_params.net_outputs[3].asnumpy()
|
|
||||||
rcnn_cls_loss = cb_params.net_outputs[4].asnumpy()
|
|
||||||
rcnn_reg_loss = cb_params.net_outputs[5].asnumpy()
|
|
||||||
rcnn_mask_loss = cb_params.net_outputs[6].asnumpy()
|
|
||||||
|
|
||||||
self.count += 1
|
self.count += 1
|
||||||
self.rpn_loss_sum += float(rpn_loss)
|
self.loss_sum += float(loss)
|
||||||
self.rcnn_loss_sum += float(rcnn_loss)
|
|
||||||
self.rpn_cls_loss_sum += float(rpn_cls_loss)
|
|
||||||
self.rpn_reg_loss_sum += float(rpn_reg_loss)
|
|
||||||
self.rcnn_cls_loss_sum += float(rcnn_cls_loss)
|
|
||||||
self.rcnn_reg_loss_sum += float(rcnn_reg_loss)
|
|
||||||
self.rcnn_mask_loss_sum += float(rcnn_mask_loss)
|
|
||||||
|
|
||||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
|
|
||||||
|
|
||||||
if self.count >= 1:
|
if self.count >= 1:
|
||||||
global time_stamp_first
|
global time_stamp_first
|
||||||
time_stamp_current = time.time()
|
time_stamp_current = time.time()
|
||||||
|
total_loss = self.loss_sum/self.count
|
||||||
rpn_loss = self.rpn_loss_sum/self.count
|
|
||||||
rcnn_loss = self.rcnn_loss_sum/self.count
|
|
||||||
rpn_cls_loss = self.rpn_cls_loss_sum/self.count
|
|
||||||
|
|
||||||
rpn_reg_loss = self.rpn_reg_loss_sum/self.count
|
|
||||||
rcnn_cls_loss = self.rcnn_cls_loss_sum/self.count
|
|
||||||
rcnn_reg_loss = self.rcnn_reg_loss_sum/self.count
|
|
||||||
rcnn_mask_loss = self.rcnn_mask_loss_sum/self.count
|
|
||||||
|
|
||||||
total_loss = rpn_loss + rcnn_loss
|
|
||||||
|
|
||||||
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
loss_file = open("./loss_{}.log".format(self.rank_id), "a+")
|
||||||
loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, "
|
loss_file.write("%lu epoch: %s step: %s ,total_loss: %.5f" %
|
||||||
"rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, rcnn_mask_loss: %.5f, "
|
|
||||||
"total_loss: %.5f" %
|
|
||||||
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
|
||||||
rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss,
|
total_loss))
|
||||||
rcnn_cls_loss, rcnn_reg_loss, rcnn_mask_loss, total_loss))
|
|
||||||
loss_file.write("\n")
|
loss_file.write("\n")
|
||||||
loss_file.close()
|
loss_file.close()
|
||||||
|
|
||||||
self.count = 0
|
self.count = 0
|
||||||
self.rpn_loss_sum = 0
|
self.loss_sum = 0
|
||||||
self.rcnn_loss_sum = 0
|
|
||||||
self.rpn_cls_loss_sum = 0
|
|
||||||
self.rpn_reg_loss_sum = 0
|
|
||||||
self.rcnn_cls_loss_sum = 0
|
|
||||||
self.rcnn_reg_loss_sum = 0
|
|
||||||
self.rcnn_mask_loss_sum = 0
|
|
||||||
|
|
||||||
class LossNet(nn.Cell):
|
class LossNet(nn.Cell):
|
||||||
"""MaskRcnn loss method"""
|
"""MaskRcnn loss method"""
|
||||||
|
@ -188,18 +149,16 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
network (Cell): The training network.
|
network (Cell): The training network.
|
||||||
network_backbone (Cell): The forward network.
|
|
||||||
optimizer (Cell): Optimizer for updating the weights.
|
optimizer (Cell): Optimizer for updating the weights.
|
||||||
sens (Number): The adjust parameter. Default value is 1.0.
|
sens (Number): The adjust parameter. Default value is 1.0.
|
||||||
reduce_flag (bool): The reduce flag. Default value is False.
|
reduce_flag (bool): The reduce flag. Default value is False.
|
||||||
mean (bool): Allreduce method. Default value is False.
|
mean (bool): Allreduce method. Default value is False.
|
||||||
degree (int): Device number. Default value is None.
|
degree (int): Device number. Default value is None.
|
||||||
"""
|
"""
|
||||||
def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
def __init__(self, network, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
|
||||||
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
super(TrainOneStepCell, self).__init__(auto_prefix=False)
|
||||||
self.network = network
|
self.network = network
|
||||||
self.network.set_grad()
|
self.network.set_grad()
|
||||||
self.backbone = network_backbone
|
|
||||||
self.weights = ParameterTuple(network.trainable_params())
|
self.weights = ParameterTuple(network.trainable_params())
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
self.grad = C.GradOperation(get_by_list=True,
|
self.grad = C.GradOperation(get_by_list=True,
|
||||||
|
@ -212,10 +171,9 @@ class TrainOneStepCell(nn.Cell):
|
||||||
|
|
||||||
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask):
|
def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask):
|
||||||
weights = self.weights
|
weights = self.weights
|
||||||
loss1, loss2, loss3, loss4, loss5, loss6, loss7 = self.backbone(x, img_shape, gt_bboxe, gt_label,
|
loss = self.network(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask)
|
||||||
gt_num, gt_mask)
|
|
||||||
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
|
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
|
||||||
if self.reduce_flag:
|
if self.reduce_flag:
|
||||||
grads = self.grad_reducer(grads)
|
grads = self.grad_reducer(grads)
|
||||||
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
|
grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
|
||||||
return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6, loss7
|
return F.depend(loss, self.optimizer(grads))
|
||||||
|
|
|
@ -123,10 +123,10 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
net_with_loss = WithLossCell(net, loss)
|
net_with_loss = WithLossCell(net, loss)
|
||||||
if args_opt.run_distribute:
|
if args_opt.run_distribute:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True,
|
||||||
mean=True, degree=device_num)
|
mean=True, degree=device_num)
|
||||||
else:
|
else:
|
||||||
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale)
|
net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale)
|
||||||
|
|
||||||
time_cb = TimeMonitor(data_size=dataset_size)
|
time_cb = TimeMonitor(data_size=dataset_size)
|
||||||
loss_cb = LossCallBack(rank_id=rank)
|
loss_cb = LossCallBack(rank_id=rank)
|
||||||
|
|
Loading…
Reference in New Issue