remove sens parameter

This commit is contained in:
Jiaqi 2020-09-04 11:39:59 +08:00
parent a26fdb83ee
commit 94d63b90f4
7 changed files with 77 additions and 73 deletions

View File

@ -88,7 +88,7 @@ class WithGradCell(Cell):
Run in PyNative mode. Run in PyNative mode.
Args: Args:
network (Cell): The target network to wrap. network (Cell): The target network to wrap. The network only supports single output.
loss_fn (Cell): Primitive loss function used to compute gradients. Default: None. loss_fn (Cell): Primitive loss function used to compute gradients. Default: None.
sens (Union[None, Tensor, Scalar, Tuple ...]): The sensitive for backpropagation, the type and shape sens (Union[None, Tensor, Scalar, Tuple ...]): The sensitive for backpropagation, the type and shape
should be same as the `network` output. If None, we will fill one to a same type shape of should be same as the `network` output. If None, we will fill one to a same type shape of
@ -143,7 +143,7 @@ class TrainOneStepCell(Cell):
parallel modes are available for training. parallel modes are available for training.
Args: Args:
network (Cell): The training network. network (Cell): The training network. The network only supports single output.
optimizer (Cell): Optimizer for updating the weights. optimizer (Cell): Optimizer for updating the weights.
sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0. sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0.

View File

@ -49,6 +49,7 @@ grad_overflow = P.FloatStatus()
def _tensor_grad_overflow(grad): def _tensor_grad_overflow(grad):
return grad_overflow(grad) return grad_overflow(grad)
class DynamicLossScaleUpdateCell(Cell): class DynamicLossScaleUpdateCell(Cell):
r""" r"""
Dynamic Loss scale update cell. Dynamic Loss scale update cell.
@ -168,27 +169,26 @@ class TrainOneStepWithLossScaleCell(Cell):
This is a training step with loss scaling. It takes a network, an optimizer and possibly a scale update This is a training step with loss scaling. It takes a network, an optimizer and possibly a scale update
Cell as args. The loss scale value can be updated in both host side or device side. The Cell as args. The loss scale value can be updated in both host side or device side. The
TrainOneStepWithLossScaleCell will be compiled to be graph which takes `data`, `label`, `sens` as input TrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
data. The `sens` is acting as loss scaling value. If you want to update it on host side, the value should The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
be provided. If `sens` is not given, the loss scale update logic should be provied by `scale_update_cell`. the value should be provided. If the Tensor type of `scale_sense` is not given, the loss scale update logic
If `scale_update_cell` is not None and `sens` is provided, the `scale_update_cell` will be ignored. should be provied by Cell type of `scale_sense`. If Cell type of `scale_sense` is not None and Tensor type
of `scale_sense` is provided, the Cell type of `scale_sense` will be ignored.
Args: Args:
network (Cell): The training network. network (Cell): The training network. The network only supports single output.
optimizer (Cell): Optimizer for updating the weights. optimizer (Cell): Optimizer for updating the weights.
scale_update_cell(Cell): The loss scaling update logic cell. Default: None. scale_sense (Union[Tensor, Cell]): If this value is Cell type, the loss scaling update logic cell.If this value
is Tensor type, Tensor with shape :math:`()`. Default: None.
Inputs: Inputs:
- **inputs** (Tensor) - Tensor of shape :math:`(N, \ldots)`. - **(*inputs)** (Tuple(Tensor)) - Tuple of input tensors with shape :math:`(N, \ldots)`.
- **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
- **scaling_sens** (Tensor) - Tensor of shape :math:`()`.
Outputs: Outputs:
Tuple of 3 Tensor, the loss, overflow flag and current loss scaling value. Tuple of 3 Tensor, the loss, overflow flag and current loss scaling value.
- **loss** (Tensor) - Tensor with shape :math:`()`. - **loss** (Tensor) - Tensor with shape :math:`()`.
- **overflow** (Tensor) - Tensor with shape :math:`()`, type is bool. - **overflow** (Tensor) - Tensor with shape :math:`()`, type is bool.
- **loss_scale** (Tensor) - Tensor with shape :math:`()`.
Examples: Examples:
>>> net_with_loss = Net() >>> net_with_loss = Net()
@ -203,7 +203,7 @@ class TrainOneStepWithLossScaleCell(Cell):
>>> output = train_network(inputs, label, scaling_sens) >>> output = train_network(inputs, label, scaling_sens)
""" """
def __init__(self, network, optimizer, scale_update_cell=None): def __init__(self, network, optimizer, scale_sense=None):
super(TrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False) super(TrainOneStepWithLossScaleCell, self).__init__(auto_prefix=False)
self.network = network self.network = network
self.network.set_grad() self.network.set_grad()
@ -236,29 +236,29 @@ class TrainOneStepWithLossScaleCell(Cell):
self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE self.is_distributed = self.parallel_mode != ParallelMode.STAND_ALONE
self.loss_scale = None self.scale_sense = None
self.loss_scaling_manager = scale_update_cell self.loss_scaling_manager = None
if scale_update_cell: if isinstance(scale_sense, Cell):
self.loss_scale = Parameter(Tensor(scale_update_cell.get_loss_scale(), dtype=mstype.float32), self.loss_scaling_manager = scale_sense
name="loss_scale") self.scale_sense = Parameter(Tensor(scale_sense.get_loss_scale(), dtype=mstype.float32),
name="scale_sense")
if isinstance(scale_sense, Tensor):
self.scale_sense = Parameter(scale_sense, name='scale_sense')
@C.add_flags(has_effect=True) @C.add_flags(has_effect=True)
def construct(self, data, label, sens=None): def construct(self, *inputs):
weights = self.weights weights = self.weights
loss = self.network(data, label) loss = self.network(*inputs)
init = False init = False
if not self.gpu_target: if not self.gpu_target:
# init overflow buffer # init overflow buffer
init = self.alloc_status() init = self.alloc_status()
# clear overflow buffer # clear overflow buffer
self.clear_status(init) self.clear_status(init)
if sens is None:
scaling_sens = self.loss_scale
else:
scaling_sens = sens
scaling_sens = self.scale_sense
scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss)) scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
grads = self.grad(self.network, weights)(data, label, scaling_sens_filled) grads = self.grad(self.network, weights)(*inputs, scaling_sens_filled)
grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads) grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
# apply grad reducer on grads # apply grad reducer on grads
grads = self.grad_reducer(grads) grads = self.grad_reducer(grads)
@ -279,8 +279,8 @@ class TrainOneStepWithLossScaleCell(Cell):
else: else:
cond = self.less_equal(self.base, flag_sum) cond = self.less_equal(self.base, flag_sum)
overflow = cond overflow = cond
if sens is None: if self.loss_scaling_manager is not None:
overflow = self.loss_scaling_manager(self.loss_scale, cond) overflow = self.loss_scaling_manager(self.scale_sense, cond)
# if there is no overflow, do optimize # if there is no overflow, do optimize
if overflow: if overflow:
opt = False opt = False
@ -288,3 +288,9 @@ class TrainOneStepWithLossScaleCell(Cell):
opt = self.optimizer(grads) opt = self.optimizer(grads)
ret = (loss, cond, scaling_sens) ret = (loss, cond, scaling_sens)
return F.depend(ret, opt) return F.depend(ret, opt)
def set_sense_scale(self, sens):
"""If the user has set the sens in the training process and wants to reassign the value, he can call
this function again to make modification, and sens needs to be of type Tensor."""
if self.scale_sense and isinstance(sens, Tensor):
self.self.scale_sense.set_data(sens)

View File

@ -182,7 +182,7 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', **kwargs):
"are supported in current version. If you use `O2` option, please" "are supported in current version. If you use `O2` option, please"
"use `loss_scale_manager=None` or `FixedLossScaleManager`") "use `loss_scale_manager=None` or `FixedLossScaleManager`")
network = nn.TrainOneStepWithLossScaleCell(network, optimizer, network = nn.TrainOneStepWithLossScaleCell(network, optimizer,
scale_update_cell=update_cell).set_train() scale_sense=update_cell).set_train()
return network return network
network = nn.TrainOneStepCell(network, optimizer, loss_scale).set_train() network = nn.TrainOneStepCell(network, optimizer, loss_scale).set_train()
return network return network

View File

@ -34,7 +34,6 @@ from ..nn.wrap.cell_wrapper import _VirtualDatasetCell
from ..context import ParallelMode from ..context import ParallelMode
from ..parallel._utils import _need_to_full, _to_full_tensor from ..parallel._utils import _need_to_full, _to_full_tensor
from ..parallel._cost_model_context import _set_multi_subgraphs from ..parallel._cost_model_context import _set_multi_subgraphs
from ..common import dtype as mstype
from .dataset_helper import DatasetHelper, connect_network_with_dataset from .dataset_helper import DatasetHelper, connect_network_with_dataset
from . import amp from . import amp
@ -489,11 +488,6 @@ class Model:
"return two elements, but got {}".format(len_element)) "return two elements, but got {}".format(len_element))
cb_params.cur_step_num += 1 cb_params.cur_step_num += 1
overflow = False
if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update():
scaling_sens = self._get_scaling_sens()
next_element = tuple(next_element) + (Tensor(scaling_sens, mstype.float32),)
cb_params.train_dataset_element = next_element cb_params.train_dataset_element = next_element
list_callback.step_begin(run_context) list_callback.step_begin(run_context)
outputs = self._train_network(*next_element) outputs = self._train_network(*next_element)

View File

@ -148,7 +148,6 @@ class MSELoss(nn.Cell):
def test_loss_scale_fp16_lr_overflow(): def test_loss_scale_fp16_lr_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
lr = Tensor(np.ones([1], np.float32) * 0.1) lr = Tensor(np.ones([1], np.float32) * 0.1)
net = NetFP16(16, 16) net = NetFP16(16, 16)
net.set_train() net.set_train()
@ -157,9 +156,11 @@ def test_loss_scale_fp16_lr_overflow():
optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
output_1 = train_network(inputs, label, scaling_sens) scale_sense=Tensor(np.full((1), np.finfo(np.float32).max),
output_2 = train_network(inputs, label, scaling_sens) dtype=mstype.float32))
output_1 = train_network(inputs, label)
output_2 = train_network(inputs, label)
assert output_1[0].asnumpy() == output_2[0].asnumpy() assert output_1[0].asnumpy() == output_2[0].asnumpy()
assert output_1[1].asnumpy() == output_2[1].asnumpy() == True assert output_1[1].asnumpy() == output_2[1].asnumpy() == True
@ -188,16 +189,17 @@ def test_loss_scale_fp16_model_train_overflow():
def test_loss_scale_fp16_opt_rmsprop_overflow(): def test_loss_scale_fp16_opt_rmsprop_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full(1, np.finfo(np.float32).max), dtype=mstype.float32)
net = NetFP16(16, 16) net = NetFP16(16, 16)
net.set_train() net.set_train()
loss = MSELoss() loss = MSELoss()
optimizer = RMSProp(net.trainable_params(), learning_rate=0.1) optimizer = RMSProp(net.trainable_params(), learning_rate=0.1)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
output_1 = train_network(inputs, label, scaling_sens) scale_sense=Tensor(np.full(1, np.finfo(np.float32).max),
output_2 = train_network(inputs, label, scaling_sens) dtype=mstype.float32))
output_1 = train_network(inputs, label)
output_2 = train_network(inputs, label)
assert output_1[0].asnumpy() == output_2[0].asnumpy() assert output_1[0].asnumpy() == output_2[0].asnumpy()
assert output_1[1].asnumpy() == output_2[1].asnumpy() == True assert output_1[1].asnumpy() == output_2[1].asnumpy() == True
@ -208,7 +210,6 @@ def test_loss_scale_fp16_opt_rmsprop_overflow():
def test_loss_scale_fp16_overflow(): def test_loss_scale_fp16_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
net = NetFP16(16, 16) net = NetFP16(16, 16)
net.set_train() net.set_train()
@ -216,8 +217,10 @@ def test_loss_scale_fp16_overflow():
optimizer = Lamb(net.trainable_params(), learning_rate=0.01) optimizer = Lamb(net.trainable_params(), learning_rate=0.01)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
net_with_loss.set_grad() net_with_loss.set_grad()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
output_1 = train_network(inputs, label, scaling_sens) scale_sense=Tensor(np.full((1), np.finfo(np.float32).max),
output_2 = train_network(inputs, label, scaling_sens) dtype=mstype.float32))
output_1 = train_network(inputs, label)
output_2 = train_network(inputs, label)
assert output_1[0].asnumpy() == output_2[0].asnumpy() assert output_1[0].asnumpy() == output_2[0].asnumpy()
assert output_1[1].asnumpy() == output_2[1].asnumpy() == True assert output_1[1].asnumpy() == output_2[1].asnumpy() == True

View File

@ -177,7 +177,7 @@ def test_compile_grad_error():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = DynamicLossScaleManager() scale_manager = DynamicLossScaleManager()
update_cell = scale_manager.get_update_cell() update_cell = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=update_cell) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=update_cell)
train_network.set_train() train_network.set_train()
with pytest.raises(TypeError) as e: with pytest.raises(TypeError) as e:
train_network(inputs, label) train_network(inputs, label)

View File

@ -100,70 +100,71 @@ class MSELoss(nn.Cell):
def test_momentum_compile(): def test_momentum_compile():
inputs = Tensor(np.ones([15, 1]).astype(np.float32)) inputs = Tensor(np.ones([15, 1]).astype(np.float32))
label = Tensor(np.zeros([15, 1]).astype(np.float32)) label = Tensor(np.zeros([15, 1]).astype(np.float32))
scaling_sens = Tensor(np.full((1), 1.0), dtype=mstype.float32)
net = Net(1, 1) net = Net(1, 1)
loss = MSELoss() loss = MSELoss()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
scale_sense=Tensor(np.full((1), 1.0), dtype=mstype.float32))
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_compile_fp16_not_overflow(): def test_compile_fp16_not_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), 1.0), dtype=mstype.float32)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
scale_sense=Tensor(np.full((1), 1.0), dtype=mstype.float32))
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_compile_fp16_lr_overflow(): def test_compile_fp16_lr_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
lr = Tensor(np.ones([1], np.float32) * 0.1) lr = Tensor(np.ones([1], np.float32) * 0.1)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
scale_sense=Tensor(np.full((1), np.finfo(np.float32).max),
dtype=mstype.float32))
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_compile_fp16_overflow(): def test_compile_fp16_overflow():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
optimizer = Lamb(net.trainable_params(), learning_rate=0.01) optimizer = Lamb(net.trainable_params(), learning_rate=0.01)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
scale_sense=Tensor(np.full((1), np.finfo(np.float32).max),
dtype=mstype.float32))
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_compile_fp16_lr_overflow_with_lossscale_update(): def test_compile_fp16_lr_overflow_with_lossscale_update():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
lr = Tensor(np.ones([1], np.float32) * 0.1) lr = Tensor(np.ones([1], np.float32) * 0.1)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
@ -172,9 +173,9 @@ def test_compile_fp16_lr_overflow_with_lossscale_update():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = DynamicLossScaleManager() scale_manager = DynamicLossScaleManager()
manager = scale_manager.get_update_cell() manager = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=manager) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
@ -209,7 +210,6 @@ def test_compile_f16_model_train_fixed():
def test_compile_fp16_lr_overflow_fixed_feed(): def test_compile_fp16_lr_overflow_fixed_feed():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
lr = Tensor(np.ones([1], np.float32) * 0.1) lr = Tensor(np.ones([1], np.float32) * 0.1)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
@ -218,16 +218,15 @@ def test_compile_fp16_lr_overflow_fixed_feed():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = FixedLossScaleManager() scale_manager = FixedLossScaleManager()
update_cell = scale_manager.get_update_cell() update_cell = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=update_cell) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=update_cell)
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_compile_fp16_lr_overflow_dynamic_feed(): def test_compile_fp16_lr_overflow_dynamic_feed():
inputs = Tensor(np.ones([16, 16]).astype(np.float32)) inputs = Tensor(np.ones([16, 16]).astype(np.float32))
label = Tensor(np.zeros([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32))
scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
lr = Tensor(np.ones([1], np.float32) * 0.1) lr = Tensor(np.ones([1], np.float32) * 0.1)
net = NetFP16(16, 16) net = NetFP16(16, 16)
loss = MSELoss() loss = MSELoss()
@ -236,9 +235,9 @@ def test_compile_fp16_lr_overflow_dynamic_feed():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = DynamicLossScaleManager() scale_manager = DynamicLossScaleManager()
update_cell = scale_manager.get_update_cell() update_cell = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=update_cell) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=update_cell)
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
@ -253,7 +252,7 @@ def test_compile_fp16_lr_overflow_fixed_graph():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = FixedLossScaleManager(drop_overflow_update=True) scale_manager = FixedLossScaleManager(drop_overflow_update=True)
update_cell = scale_manager.get_update_cell() update_cell = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=update_cell) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=update_cell)
train_network.set_train() train_network.set_train()
output = train_network(inputs, label) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
@ -270,7 +269,7 @@ def test_compile_fp16_lr_overflow_dynamic_graph():
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
scale_manager = DynamicLossScaleManager() scale_manager = DynamicLossScaleManager()
update_cell = scale_manager.get_update_cell() update_cell = scale_manager.get_update_cell()
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_update_cell=update_cell) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=update_cell)
train_network.set_train() train_network.set_train()
output = train_network(inputs, label) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
@ -279,7 +278,6 @@ def test_compile_fp16_lr_overflow_dynamic_graph():
def adam_compile(loss_scale=1.0): def adam_compile(loss_scale=1.0):
inputs = Tensor(np.ones([15, 1]).astype(np.float32)) inputs = Tensor(np.ones([15, 1]).astype(np.float32))
label = Tensor(np.zeros([15, 1]).astype(np.float32)) label = Tensor(np.zeros([15, 1]).astype(np.float32))
scaling_sens = Tensor(np.full((1), 1.0), dtype=mstype.float32)
net = Net(1, 1) net = Net(1, 1)
loss = MSELoss() loss = MSELoss()
@ -287,14 +285,17 @@ def adam_compile(loss_scale=1.0):
use_nesterov=False, weight_decay=0.0, loss_scale=loss_scale) use_nesterov=False, weight_decay=0.0, loss_scale=loss_scale)
net_with_loss = WithLossCell(net, loss) net_with_loss = WithLossCell(net, loss)
train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer,
scale_sense=Tensor(np.full((1), 1.0), dtype=mstype.float32))
train_network.set_train() train_network.set_train()
output = train_network(inputs, label, scaling_sens) output = train_network(inputs, label)
print("the result is ", output) print("the result is ", output)
def test_adam_compile(): def test_adam_compile():
adam_compile() adam_compile()
def test_adam_loss_scale_compile(): def test_adam_loss_scale_compile():
""" test setting loss_scale to 1e-40 """ """ test setting loss_scale to 1e-40 """
adam_compile(loss_scale=1e-40) adam_compile(loss_scale=1e-40)