forked from mindspore-Ecosystem/mindspore
[Boost]Add MindBoost.
This commit is contained in:
parent
166ad9e3a7
commit
82b88ade65
|
@ -277,6 +277,7 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
|
||||
${CMAKE_SOURCE_DIR}/mindspore/numpy
|
||||
${CMAKE_SOURCE_DIR}/mindspore/train
|
||||
${CMAKE_SOURCE_DIR}/mindspore/boost
|
||||
${CMAKE_SOURCE_DIR}/mindspore/common
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/communication
|
||||
|
|
|
@ -175,6 +175,7 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
|
||||
${CMAKE_SOURCE_DIR}/mindspore/numpy
|
||||
${CMAKE_SOURCE_DIR}/mindspore/train
|
||||
${CMAKE_SOURCE_DIR}/mindspore/boost
|
||||
${CMAKE_SOURCE_DIR}/mindspore/common
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/communication
|
||||
|
|
|
@ -13,22 +13,22 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Accelerating.
|
||||
MindBoost(Beta Feature)
|
||||
|
||||
Provide auto accelerating for network, such as Less BN, Gradient Freeze.
|
||||
"""
|
||||
from .acc import *
|
||||
from .boost import *
|
||||
from .base import *
|
||||
from .acc_cell_wrapper import *
|
||||
from .boost_cell_wrapper import *
|
||||
from .less_batch_normalization import *
|
||||
from .grad_freeze import *
|
||||
from .grad_accumulation import *
|
||||
from .adasum import *
|
||||
|
||||
|
||||
__all__ = ['AutoAcc',
|
||||
__all__ = ['AutoBoost',
|
||||
'OptimizerProcess', 'ParameterProcess',
|
||||
'AccTrainOneStepCell', 'AccTrainOneStepWithLossScaleCell',
|
||||
'BoostTrainOneStepCell', 'BoostTrainOneStepWithLossScaleCell',
|
||||
'LessBN',
|
||||
'GradientFreeze', 'FreezeOpt', 'freeze_cell',
|
||||
'GradientAccumulation',
|
|
@ -26,10 +26,34 @@ __all__ = ["OptimizerProcess", "ParameterProcess"]
|
|||
|
||||
class OptimizerProcess:
|
||||
"""
|
||||
Process optimizer for ACC.
|
||||
Process optimizer for Boost. Currently, this class supports adding GC(grad centralization) tags
|
||||
and creating new optimizers.
|
||||
|
||||
Args:
|
||||
opt (Cell): Optimizer used.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore import Tensor, Parameter, nn
|
||||
>>> from mindspore.ops import operations as P
|
||||
>>> from mindspore.boost import OptimizerProcess
|
||||
>>>
|
||||
>>> class Net(nn.Cell):
|
||||
... def __init__(self, in_features, out_features):
|
||||
... super(Net, self).__init__()
|
||||
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
... name='weight')
|
||||
... self.matmul = ops.MatMul()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... output = self.matmul(x, self.weight)
|
||||
... return output
|
||||
...
|
||||
>>> size, in_features, out_features = 16, 16, 10
|
||||
>>> network = Net(in_features, out_features)
|
||||
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
||||
>>> optimizer_process = OptimizerProcess(optimizer)
|
||||
>>> optimizer_process.add_grad_centralization(network)
|
||||
>>> optimizer = optimizer_process.generate_new_optimizer()
|
||||
"""
|
||||
def __init__(self, opt):
|
||||
if isinstance(opt, LARS):
|
||||
|
@ -113,7 +137,34 @@ class OptimizerProcess:
|
|||
|
||||
class ParameterProcess:
|
||||
"""
|
||||
Process parameter for ACC.
|
||||
Process parameter for Boost. Currently, this class supports creating group parameters
|
||||
and automatically setting gradient segmentation point.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore import Tensor, Parameter, nn
|
||||
>>> from mindspore.ops import operations as P
|
||||
>>> from mindspore.boost import OptimizerProcess
|
||||
>>>
|
||||
>>> class Net(nn.Cell):
|
||||
... def __init__(self, in_features, out_features):
|
||||
... super(Net, self).__init__()
|
||||
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
... name='weight')
|
||||
... self.weight2 = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
... name='weight2')
|
||||
... self.matmul = ops.MatMul()
|
||||
... self.matmul2 = ops.MatMul()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... output = self.matmul(x, self.weight)
|
||||
... output2 = self.matmul2(x, self.weight2)
|
||||
... return output + output2
|
||||
...
|
||||
>>> size, in_features, out_features = 16, 16, 10
|
||||
>>> network = Net(in_features, out_features)
|
||||
>>> new_parameter = net.trainable_params()[:1]
|
||||
>>> parameter_process = ParameterProcess()
|
||||
>>> group_params = parameter_process.generate_group_params(new_parameter, net.trainable_params())
|
||||
"""
|
||||
def __init__(self):
|
||||
self._parameter_indices = 1
|
|
@ -12,16 +12,16 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""acc"""
|
||||
"""boost"""
|
||||
from .less_batch_normalization import LessBN
|
||||
from .grad_freeze import GradientFreeze
|
||||
from .base import OptimizerProcess, ParameterProcess
|
||||
|
||||
|
||||
__all__ = ["AutoAcc"]
|
||||
__all__ = ["AutoBoost"]
|
||||
|
||||
|
||||
_acc_config_level = {
|
||||
_boost_config_level = {
|
||||
"O0": {
|
||||
"less_bn": False,
|
||||
"grad_freeze": False,
|
||||
|
@ -36,19 +36,19 @@ _acc_config_level = {
|
|||
"adasum": True}}
|
||||
|
||||
|
||||
class AutoAcc:
|
||||
class AutoBoost:
|
||||
"""
|
||||
Provide auto accelerating for network.
|
||||
|
||||
Args:
|
||||
level (Str): acc config level.
|
||||
level (Str): boost config level.
|
||||
"""
|
||||
def __init__(self, level, kwargs):
|
||||
if level not in _acc_config_level.keys():
|
||||
if level not in _boost_config_level.keys():
|
||||
level = 'O0'
|
||||
self.level = level
|
||||
acc_config = _acc_config_level[level]
|
||||
self._acc_config = acc_config
|
||||
boost_config = _boost_config_level[level]
|
||||
self._boost_config = boost_config
|
||||
self._fn_flag = True
|
||||
self._gc_flag = True
|
||||
self._param_groups = 10
|
||||
|
@ -62,13 +62,13 @@ class AutoAcc:
|
|||
def _get_configuration(self, kwargs):
|
||||
"""Get configuration."""
|
||||
for key, val in kwargs.items():
|
||||
if key not in self._acc_config_func_map.keys():
|
||||
if key not in self._boost_config_func_map.keys():
|
||||
continue
|
||||
self._acc_config_func_map[key](self, val)
|
||||
self._boost_config_func_map[key](self, val)
|
||||
|
||||
def network_auto_process_train(self, network, optimizer):
|
||||
"""Network train."""
|
||||
if self._acc_config["less_bn"]:
|
||||
if self._boost_config["less_bn"]:
|
||||
network = LessBN(network, fn_flag=self._fn_flag)
|
||||
optimizer_process = OptimizerProcess(optimizer)
|
||||
group_params = self._param_processer.assign_parameter_group(network.trainable_params(),
|
||||
|
@ -79,18 +79,18 @@ class AutoAcc:
|
|||
optimizer_process.add_grad_centralization(network)
|
||||
optimizer = optimizer_process.generate_new_optimizer()
|
||||
|
||||
if self._acc_config["grad_freeze"]:
|
||||
if self._boost_config["grad_freeze"]:
|
||||
freeze_processer = GradientFreeze(self._param_groups, self._freeze_type,
|
||||
self._freeze_p, self._total_steps)
|
||||
network, optimizer = freeze_processer.freeze_generate(network, optimizer)
|
||||
|
||||
if self._acc_config["adasum"]:
|
||||
if self._boost_config["adasum"]:
|
||||
setattr(optimizer, "adasum", True)
|
||||
return network, optimizer
|
||||
|
||||
def network_auto_process_eval(self, network):
|
||||
"""Network eval."""
|
||||
if self._acc_config["less_bn"]:
|
||||
if self._boost_config["less_bn"]:
|
||||
network = LessBN(network)
|
||||
|
||||
return network
|
||||
|
@ -120,7 +120,7 @@ class AutoAcc:
|
|||
gradient_groups = list(gradient_groups)
|
||||
self._gradient_groups = gradient_groups
|
||||
|
||||
_acc_config_func_map = {
|
||||
_boost_config_func_map = {
|
||||
"fn_flag": set_fn_flag,
|
||||
"gc_flag": set_gc_flag,
|
||||
"param_groups": set_param_groups,
|
|
@ -12,7 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Acc Mode Cell Wrapper."""
|
||||
"""Boost Mode Cell Wrapper."""
|
||||
from mindspore.nn.wrap import TrainOneStepCell
|
||||
import mindspore.context as context
|
||||
from mindspore.context import ParallelMode, get_auto_parallel_context
|
||||
|
@ -31,7 +31,7 @@ from .adasum import AdaSum
|
|||
from .grad_accumulation import gradient_accumulation_op, gradient_clear_op
|
||||
|
||||
|
||||
__all__ = ["AccTrainOneStepCell", "AccTrainOneStepWithLossScaleCell"]
|
||||
__all__ = ["BoostTrainOneStepCell", "BoostTrainOneStepWithLossScaleCell"]
|
||||
|
||||
|
||||
_get_delta_weight = C.MultitypeFuncGraph("_get_delta_weight")
|
||||
|
@ -51,9 +51,9 @@ def _save_weight_process(new_parameter, old_parameter):
|
|||
return P.Assign()(new_parameter, old_parameter)
|
||||
|
||||
|
||||
class AccTrainOneStepCell(TrainOneStepCell):
|
||||
class BoostTrainOneStepCell(TrainOneStepCell):
|
||||
r"""
|
||||
Acc Network training package class.
|
||||
Boost Network training package class.
|
||||
|
||||
Wraps the network with an optimizer. The resulting Cell is trained with input '\*inputs'.
|
||||
The backward graph will be created in the construct function to update the parameter. Different
|
||||
|
@ -82,29 +82,29 @@ class AccTrainOneStepCell(TrainOneStepCell):
|
|||
>>> optim = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
||||
>>> #1) Using the WithLossCell existing provide
|
||||
>>> loss_net = nn.WithLossCell(net, loss_fn)
|
||||
>>> train_net = nn.acc.AccTrainOneStepCell(loss_net, optim)
|
||||
>>> train_net = boost.BoostTrainOneStepCell(loss_net, optim)
|
||||
>>>
|
||||
>>> #2) Using user-defined WithLossCell
|
||||
>>> class MyWithLossCell(Cell):
|
||||
mindspore. def __init__(self, backbone, loss_fn):
|
||||
mindspore. super(MyWithLossCell, self).__init__(auto_prefix=False)
|
||||
mindspore. self._backbone = backbone
|
||||
mindspore. self._loss_fn = loss_fn
|
||||
mindspore.
|
||||
mindspore. def construct(self, x, y, label):
|
||||
mindspore. out = self._backbone(x, y)
|
||||
mindspore. return self._loss_fn(out, label)
|
||||
mindspore.
|
||||
mindspore. @property
|
||||
mindspore. def backbone_network(self):
|
||||
mindspore. return self._backbone
|
||||
mindspore.
|
||||
... def __init__(self, backbone, loss_fn):
|
||||
... super(MyWithLossCell, self).__init__(auto_prefix=False)
|
||||
... self._backbone = backbone
|
||||
... self._loss_fn = loss_fn
|
||||
...
|
||||
... def construct(self, x, y, label):
|
||||
... out = self._backbone(x, y)
|
||||
... return self._loss_fn(out, label)
|
||||
...
|
||||
... @property
|
||||
... def backbone_network(self):
|
||||
... return self._backbone
|
||||
...
|
||||
>>> loss_net = MyWithLossCell(net, loss_fn)
|
||||
>>> train_net = nn.acc.AccTrainOneStepCellTrainOneStepCell(loss_net, optim)
|
||||
>>> train_net = boost.BoostTrainOneStepCellTrainOneStepCell(loss_net, optim)
|
||||
"""
|
||||
|
||||
def __init__(self, network, optimizer, sens=1.0):
|
||||
super(AccTrainOneStepCell, self).__init__(network, optimizer, sens)
|
||||
super(BoostTrainOneStepCell, self).__init__(network, optimizer, sens)
|
||||
self.hyper_map = C.HyperMap()
|
||||
self.freeze = isinstance(optimizer, FreezeOpt)
|
||||
if not self.freeze:
|
||||
|
@ -240,13 +240,13 @@ class AccTrainOneStepCell(TrainOneStepCell):
|
|||
return is_enable
|
||||
|
||||
|
||||
class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
|
||||
class BoostTrainOneStepWithLossScaleCell(BoostTrainOneStepCell):
|
||||
r"""
|
||||
Acc Network training with loss scaling.
|
||||
Boost Network training with loss scaling.
|
||||
|
||||
This is a training step with loss scaling. It takes a network, an optimizer and possibly a scale update
|
||||
Cell as args. The loss scale value can be updated in both host side or device side. The
|
||||
AccTrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
|
||||
BoostTrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
|
||||
The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
|
||||
the value must be provided. If the Tensor type of `scale_sense` is not given, the loss scale update logic
|
||||
must be provied by Cell type of `scale_sense`.
|
||||
|
@ -282,16 +282,16 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
|
|||
>>> from mindspore.common import dtype as mstype
|
||||
>>>
|
||||
>>> class Net(nn.Cell):
|
||||
mindspore. def __init__(self, in_features, out_features):
|
||||
mindspore. super(Net, self).__init__()
|
||||
mindspore. self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
mindspore. name='weight')
|
||||
mindspore. self.matmul = P.MatMul()
|
||||
mindspore.
|
||||
mindspore. def construct(self, x):
|
||||
mindspore. output = self.matmul(x, self.weight)
|
||||
mindspore. return output
|
||||
mindspore.
|
||||
... def __init__(self, in_features, out_features):
|
||||
... super(Net, self).__init__()
|
||||
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
|
||||
... name='weight')
|
||||
... self.matmul = ops.MatMul()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... output = self.matmul(x, self.weight)
|
||||
... return output
|
||||
...
|
||||
>>> size, in_features, out_features = 16, 16, 10
|
||||
>>> #1) when the type of scale_sense is Cell:
|
||||
>>> net = Net(in_features, out_features)
|
||||
|
@ -299,7 +299,7 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
|
|||
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
||||
>>> net_with_loss = WithLossCell(net, loss)
|
||||
>>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
|
||||
>>> train_network = nn.acc.AccTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
|
||||
>>> train_network = boost.BoostTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
|
||||
>>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
|
||||
>>> labels = Tensor(np.ones([out_features,]), mindspore.float32)
|
||||
>>> output = train_network(input, labels)
|
||||
|
@ -312,11 +312,11 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
|
|||
>>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32))
|
||||
>>> label = Tensor(np.zeros([size, out_features]).astype(np.float32))
|
||||
>>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
|
||||
>>> train_network = nn.acc.AccTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=scaling_sens)
|
||||
>>> train_network = boost.BoostTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=scaling_sens)
|
||||
>>> output = train_network(inputs, label)
|
||||
"""
|
||||
def __init__(self, network, optimizer, scale_sense):
|
||||
super(AccTrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
|
||||
super(BoostTrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
|
||||
self.base = Tensor(1, mstype.float32)
|
||||
self.reduce_sum = P.ReduceSum(keep_dims=False)
|
||||
self.less_equal = P.LessEqual()
|
|
@ -91,13 +91,13 @@ class LessBN(Cell):
|
|||
network (Cell): Network to be modified.
|
||||
|
||||
Examples:
|
||||
>>> network = acc.LessBN(network)
|
||||
>>> network = boost.LessBN(network)
|
||||
"""
|
||||
|
||||
def __init__(self, network, fn_flag=False):
|
||||
super(LessBN, self).__init__()
|
||||
self.network = network
|
||||
self.network.set_acc("less_bn")
|
||||
self.network.set_boost("less_bn")
|
||||
self.network.update_cell_prefix()
|
||||
if fn_flag:
|
||||
self._convert_to_less_bn_net(self.network)
|
|
@ -1145,29 +1145,29 @@ class Cell(Cell_):
|
|||
self._add_init_args(**flags)
|
||||
return self
|
||||
|
||||
def set_acc(self, acc_type):
|
||||
def set_boost(self, boost_type):
|
||||
"""
|
||||
In order to improve the network performance, configure the network auto enable to
|
||||
accelerate the algorithm in the algorithm library.
|
||||
|
||||
If `acc_type is not in the algorithm library`, Please view the algorithm in the algorithm library
|
||||
If `boost_type is not in the algorithm library`, Please view the algorithm in the algorithm library
|
||||
through `algorithm library`.
|
||||
|
||||
Note:
|
||||
Some acceleration algorithms may affect the accuracy of the network, please choose carefully.
|
||||
|
||||
Args:
|
||||
acc_type (str): accelerate algorithm.
|
||||
boost_type (str): accelerate algorithm.
|
||||
|
||||
Returns:
|
||||
Cell, the cell itself.
|
||||
|
||||
Raises:
|
||||
ValueError: If acc_type is not in the algorithm library.
|
||||
ValueError: If boost_type is not in the algorithm library.
|
||||
"""
|
||||
if acc_type not in ("less_bn",):
|
||||
raise ValueError("The acc_type is not in the algorithm library.")
|
||||
flags = {"less_bn": acc_type == "less_bn"}
|
||||
if boost_type not in ("less_bn",):
|
||||
raise ValueError("The boost_type is not in the algorithm library.")
|
||||
flags = {"less_bn": boost_type == "less_bn"}
|
||||
self.add_flags_recursive(**flags)
|
||||
return self
|
||||
|
||||
|
|
|
@ -17,13 +17,13 @@ from .. import nn
|
|||
from .._checkparam import Validator as validator
|
||||
from .._checkparam import Rel
|
||||
from ..common import dtype as mstype
|
||||
from ..nn import acc
|
||||
from ..nn.wrap.cell_wrapper import _VirtualDatasetCell, _TrainPipelineAccuStepCell
|
||||
from ..nn.wrap.loss_scale import _TrainPipelineWithLossScaleCell
|
||||
from ..ops import functional as F
|
||||
from ..parallel._utils import _get_parallel_mode, _get_pipeline_stages
|
||||
from .loss_scale_manager import DynamicLossScaleManager, LossScaleManager
|
||||
from ..context import ParallelMode
|
||||
from .. import boost
|
||||
from .. import context
|
||||
|
||||
|
||||
|
@ -111,7 +111,7 @@ def _add_loss_network(network, loss_fn, cast_model_type):
|
|||
return network
|
||||
|
||||
|
||||
def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level='O0', **kwargs):
|
||||
def build_train_network(network, optimizer, loss_fn=None, level='O0', boost_level='O0', **kwargs):
|
||||
"""
|
||||
Build the mixed precision training cell automatically.
|
||||
|
||||
|
@ -147,9 +147,9 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
|
|||
(with property `drop_overflow_update=False` ), or a `ValueError` exception will be raised.
|
||||
"""
|
||||
validator.check_value_type('network', network, nn.Cell)
|
||||
validator.check_value_type('optimizer', optimizer, (nn.Optimizer, acc.FreezeOpt))
|
||||
validator.check_value_type('optimizer', optimizer, (nn.Optimizer, boost.FreezeOpt))
|
||||
validator.check('level', level, "", ['O0', 'O2', 'O3', "auto"], Rel.IN)
|
||||
validator.check('acc_level', acc_level, "", ['O0', 'O1', 'O2'], Rel.IN)
|
||||
validator.check('boost_level', boost_level, "", ['O0', 'O1', 'O2'], Rel.IN)
|
||||
|
||||
if level == "auto":
|
||||
device_target = context.get_context('device_target')
|
||||
|
@ -175,9 +175,9 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
|
|||
if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
|
||||
network = _VirtualDatasetCell(network)
|
||||
|
||||
enable_acc = False
|
||||
if acc_level in ["O1", "O2"]:
|
||||
enable_acc = True
|
||||
enable_boost = False
|
||||
if boost_level in ["O1", "O2"]:
|
||||
enable_boost = True
|
||||
|
||||
loss_scale = 1.0
|
||||
if config["loss_scale_manager"] is not None:
|
||||
|
@ -193,17 +193,17 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
|
|||
if _get_pipeline_stages() > 1:
|
||||
network = _TrainPipelineWithLossScaleCell(network, optimizer,
|
||||
scale_sense=update_cell).set_train()
|
||||
elif enable_acc:
|
||||
network = acc.AccTrainOneStepWithLossScaleCell(network, optimizer,
|
||||
scale_sense=update_cell).set_train()
|
||||
elif enable_boost:
|
||||
network = boost.BoostTrainOneStepWithLossScaleCell(network, optimizer,
|
||||
scale_sense=update_cell).set_train()
|
||||
else:
|
||||
network = nn.TrainOneStepWithLossScaleCell(network, optimizer,
|
||||
scale_sense=update_cell).set_train()
|
||||
return network
|
||||
if _get_pipeline_stages() > 1:
|
||||
network = _TrainPipelineAccuStepCell(network, optimizer).set_train()
|
||||
elif enable_acc:
|
||||
network = acc.AccTrainOneStepCell(network, optimizer, loss_scale).set_train()
|
||||
elif enable_boost:
|
||||
network = boost.BoostTrainOneStepCell(network, optimizer, loss_scale).set_train()
|
||||
else:
|
||||
network = nn.TrainOneStepCell(network, optimizer, loss_scale).set_train()
|
||||
return network
|
||||
|
|
|
@ -32,7 +32,7 @@ from ..parallel._ps_context import _is_role_pserver, _is_role_sched
|
|||
from ..nn.metrics import Loss
|
||||
from .. import nn
|
||||
from ..nn.wrap.cell_wrapper import _VirtualDatasetCell
|
||||
from ..nn.acc import acc
|
||||
from ..boost import AutoBoost
|
||||
from ..context import ParallelMode
|
||||
from ..parallel._cost_model_context import _set_multi_subgraphs
|
||||
from .dataset_helper import DatasetHelper, connect_network_with_dataset
|
||||
|
@ -89,13 +89,13 @@ class Model:
|
|||
|
||||
O2 is recommended on GPU, O3 is recommended on Ascend.The more detailed explanation of `amp_level` setting
|
||||
can be found at `mindspore.amp.build_train_network` .
|
||||
acc_level (str): Option for argument `level` in `mindspore.acc` , level for acc mode
|
||||
boost_level (str): Option for argument `level` in `mindspore.boost` , level for boost mode
|
||||
training. Supports ["O0", "O1", "O2"]. Default: "O0".
|
||||
|
||||
- O0: Do not change.
|
||||
- O1: Enable the acc mode, the performance is improved by about 20%, and
|
||||
- O1: Enable the boost mode, the performance is improved by about 20%, and
|
||||
the accuracy is the same as the original accuracy.
|
||||
- O2: Enable the acc mode, the performance is improved by about 30%, and
|
||||
- O2: Enable the boost mode, the performance is improved by about 30%, and
|
||||
the accuracy is reduced by less than 3%.
|
||||
Examples:
|
||||
>>> from mindspore import Model, nn
|
||||
|
@ -132,7 +132,7 @@ class Model:
|
|||
"""
|
||||
|
||||
def __init__(self, network, loss_fn=None, optimizer=None, metrics=None, eval_network=None,
|
||||
eval_indexes=None, amp_level="O0", acc_level="O0", **kwargs):
|
||||
eval_indexes=None, amp_level="O0", boost_level="O0", **kwargs):
|
||||
self._network = network
|
||||
self._loss_fn = loss_fn
|
||||
self._optimizer = optimizer
|
||||
|
@ -141,7 +141,7 @@ class Model:
|
|||
self._keep_bn_fp32 = True
|
||||
self._check_kwargs(kwargs)
|
||||
self._amp_level = amp_level
|
||||
self._acc_level = acc_level
|
||||
self._boost_level = boost_level
|
||||
self._eval_network = eval_network
|
||||
self._process_amp_args(kwargs)
|
||||
self._parallel_mode = _get_parallel_mode()
|
||||
|
@ -152,7 +152,7 @@ class Model:
|
|||
|
||||
self._check_amp_level_arg(optimizer, amp_level)
|
||||
self._check_for_graph_cell(kwargs)
|
||||
self._build_acc_network(kwargs)
|
||||
self._build_boost_network(kwargs)
|
||||
self._train_network = self._build_train_network()
|
||||
self._build_eval_network(metrics, self._eval_network, eval_indexes)
|
||||
self._build_predict_network()
|
||||
|
@ -194,16 +194,16 @@ class Model:
|
|||
if hasattr(dataset, '__model_hash__') and dataset.__model_hash__ != hash(self):
|
||||
raise RuntimeError('The Dataset cannot be bound to different models, please create a new dataset.')
|
||||
|
||||
def _build_acc_network(self, kwargs):
|
||||
"""Build the acc network."""
|
||||
processor = acc.AutoAcc(self._acc_level, kwargs)
|
||||
def _build_boost_network(self, kwargs):
|
||||
"""Build the boost network."""
|
||||
processor = AutoBoost(self._boost_level, kwargs)
|
||||
if processor.level not in ["O1", "O2"]:
|
||||
return
|
||||
if self._optimizer is None:
|
||||
logger.warning("In acc mode, the optimizer must be defined.")
|
||||
logger.warning("In boost mode, the optimizer must be defined.")
|
||||
return
|
||||
if self._eval_network is None and self._metrics is None:
|
||||
logger.warning("In acc mode, the eval_network and metrics cannot be undefined at the same time.")
|
||||
logger.warning("In boost mode, the eval_network and metrics cannot be undefined at the same time.")
|
||||
return
|
||||
|
||||
self._network, self._optimizer = processor.network_auto_process_train(self._network, self._optimizer)
|
||||
|
@ -222,7 +222,7 @@ class Model:
|
|||
self._optimizer,
|
||||
self._loss_fn,
|
||||
level=self._amp_level,
|
||||
acc_level=self._acc_level,
|
||||
boost_level=self._boost_level,
|
||||
loss_scale_manager=self._loss_scale_manager,
|
||||
keep_batchnorm_fp32=self._keep_bn_fp32)
|
||||
else:
|
||||
|
@ -230,7 +230,7 @@ class Model:
|
|||
self._optimizer,
|
||||
self._loss_fn,
|
||||
level=self._amp_level,
|
||||
acc_level=self._acc_level,
|
||||
boost_level=self._boost_level,
|
||||
keep_batchnorm_fp32=self._keep_bn_fp32)
|
||||
elif self._loss_fn:
|
||||
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
|
||||
|
|
|
@ -210,6 +210,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
|
|||
│ ├──local_adapter.py # Get local ID
|
||||
│ └──moxing_adapter.py # Parameter processing
|
||||
├── default_config.yaml # Training parameter profile(ascend)
|
||||
├── default_config_boost.yaml # Training parameter profile(ascend boost)
|
||||
├── default_config_cpu.yaml # Training parameter profile(cpu)
|
||||
├── default_config_gpu.yaml # Training parameter profile(gpu)
|
||||
├── train.py # training script
|
||||
|
|
|
@ -211,7 +211,7 @@ MobileNetV2总体网络架构如下:
|
|||
│ ├──local_adapter.py # 获取本地id
|
||||
│ └──moxing_adapter.py # 云上数据准备
|
||||
├── default_config.yaml # 训练配置参数(ascend)
|
||||
├── default_config_acc.yaml # 训练配置参数(ascend acc模式)
|
||||
├── default_config_boost.yaml # 训练配置参数(ascend boost模式)
|
||||
├── default_config_cpu.yaml # 训练配置参数(cpu)
|
||||
├── default_config_gpu.yaml # 训练配置参数(gpu)
|
||||
├── train.py # 训练脚本
|
||||
|
|
|
@ -18,7 +18,7 @@ num_classes: 1000
|
|||
image_height: 224
|
||||
image_width: 224
|
||||
num_workers: 32
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
batch_size: 256
|
||||
epoch_size: 200
|
||||
warmup_epochs: 4
|
||||
|
|
|
@ -18,7 +18,7 @@ num_classes: 1000
|
|||
image_height: 224
|
||||
image_width: 224
|
||||
num_workers: 32
|
||||
acc_mode: "O1"
|
||||
boost_mode: "O1"
|
||||
batch_size: 256
|
||||
epoch_size: 200
|
||||
warmup_epochs: 4
|
|
@ -18,7 +18,7 @@ num_classes: 26
|
|||
image_height: 224
|
||||
image_width: 224
|
||||
num_workers: 8
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
batch_size: 150
|
||||
epoch_size: 15
|
||||
warmup_epochs: 0
|
||||
|
|
|
@ -18,7 +18,7 @@ num_classes: 1000
|
|||
image_height: 224
|
||||
image_width: 224
|
||||
num_workers: 8
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
batch_size: 150
|
||||
epoch_size: 200
|
||||
warmup_epochs: 0
|
||||
|
|
|
@ -172,7 +172,7 @@ def train_mobilenetv2():
|
|||
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale,
|
||||
metrics=metrics, eval_network=dist_eval_network,
|
||||
amp_level="O2", keep_batchnorm_fp32=False,
|
||||
acc_level=config.acc_mode)
|
||||
boost_level=config.boost_mode)
|
||||
|
||||
else:
|
||||
opt = Momentum(net.trainable_params(), lr, config.momentum, config.weight_decay)
|
||||
|
|
|
@ -209,7 +209,7 @@ If you want to run in modelarts, please check the official documentation of [mod
|
|||
├── resnet18_imagenet2012_config_gpu.yaml
|
||||
├── resnet34_imagenet2012_config.yaml
|
||||
├── resnet50_cifar10_config.yaml
|
||||
├── resnet50_imagenet2012_Acc_config.yaml # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
|
||||
├── resnet50_imagenet2012_Boost_config.yaml # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
|
||||
├── resnet50_imagenet2012_Ascend_Thor_config.yaml
|
||||
├── resnet50_imagenet2012_config.yaml
|
||||
├── resnet50_imagenet2012_GPU_Thor_config.yaml
|
||||
|
|
|
@ -195,7 +195,7 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
|
|||
├── resnet18_imagenet2012_config_gpu.yaml
|
||||
├── resnet34_imagenet2012_config.yaml
|
||||
├── resnet50_cifar10_config.yaml
|
||||
├── resnet50_imagenet2012_Acc_config.yaml # 高性能版本:性能提高超过10%而精度下降少于1%
|
||||
├── resnet50_imagenet2012_Boost_config.yaml # 高性能版本:性能提高超过10%而精度下降少于1%
|
||||
├── resnet50_imagenet2012_Ascend_Thor_config.yaml
|
||||
├── resnet50_imagenet2012_config.yaml
|
||||
├── resnet50_imagenet2012_GPU_Thor_config.yaml
|
||||
|
|
|
@ -50,7 +50,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -50,7 +50,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -50,7 +50,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -52,7 +52,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -52,7 +52,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -52,7 +52,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -50,7 +50,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -51,7 +51,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "HeUniform"
|
||||
dense_init: "HeUniform"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -52,7 +52,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O1"
|
||||
boost_mode: "O1"
|
||||
conv_init: "TruncatedNormal"
|
||||
dense_init: "RandomNormal"
|
||||
all_reduce_fusion_config:
|
|
@ -51,7 +51,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "HeUniform"
|
||||
dense_init: "HeUniform"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -52,7 +52,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -25,7 +25,7 @@ eval: False
|
|||
save_ckpt: False
|
||||
mode_name: "GRAPH"
|
||||
dtype: "fp16"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
train_image_size: 224
|
||||
|
|
|
@ -53,7 +53,7 @@ eval_interval: 1
|
|||
enable_cache: False
|
||||
cache_session_id: ""
|
||||
mode_name: "GRAPH"
|
||||
acc_mode: "O0"
|
||||
boost_mode: "O0"
|
||||
conv_init: "XavierUniform"
|
||||
dense_init: "TruncatedNormal"
|
||||
all_reduce_fusion_config:
|
||||
|
|
|
@ -110,7 +110,7 @@ def set_parameter():
|
|||
gradients_mean=True)
|
||||
set_algo_parameters(elementwise_op_strategy_follow=True)
|
||||
if config.net_name == "resnet50" or config.net_name == "se-resnet50":
|
||||
if config.acc_mode not in ["O1", "O2"]:
|
||||
if config.boost_mode not in ["O1", "O2"]:
|
||||
context.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config)
|
||||
elif config.net_name in ["resnet101", "resnet152"]:
|
||||
context.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config)
|
||||
|
@ -258,7 +258,7 @@ def train_net():
|
|||
model = Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network)
|
||||
else:
|
||||
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics,
|
||||
amp_level="O2", acc_level=config.acc_mode, keep_batchnorm_fp32=False,
|
||||
amp_level="O2", boost_level=config.boost_mode, keep_batchnorm_fp32=False,
|
||||
eval_network=dist_eval_network)
|
||||
|
||||
if config.optimizer == "Thor" and config.dataset == "imagenet2012":
|
||||
|
|
|
@ -35,7 +35,7 @@ schema_dir: ''
|
|||
# ==============================================================================
|
||||
# pretrain related
|
||||
batch_size: 32
|
||||
# Available: [base, nezha, large, large_acc]
|
||||
# Available: [base, nezha, large, large_boost]
|
||||
bert_network: 'base'
|
||||
loss_scale_value: 65536
|
||||
scale_factor: 2
|
||||
|
@ -138,8 +138,8 @@ large_net_cfg:
|
|||
dtype: mstype.float32
|
||||
compute_type: mstype.float16
|
||||
# Accelerated large network which is only supported in Ascend yet.
|
||||
large_acc_batch_size: 24
|
||||
large_acc_net_cfg:
|
||||
large_boost_batch_size: 24
|
||||
large_boost_net_cfg:
|
||||
seq_length: 512
|
||||
vocab_size: 30522
|
||||
hidden_size: 1024
|
||||
|
|
|
@ -35,8 +35,8 @@ schema_dir: ''
|
|||
# ==============================================================================
|
||||
# pretrain related
|
||||
batch_size: 20
|
||||
# Available: [base, nezha, large, large_acc]
|
||||
bert_network: 'large_acc'
|
||||
# Available: [base, nezha, large, large_boost]
|
||||
bert_network: 'large_boost'
|
||||
loss_scale_value: 65536
|
||||
scale_factor: 2
|
||||
scale_window: 1000
|
||||
|
@ -138,8 +138,8 @@ large_net_cfg:
|
|||
dtype: mstype.float32
|
||||
compute_type: mstype.float16
|
||||
# Accelerated large network which is only supported in Ascend yet.
|
||||
large_acc_batch_size: 20
|
||||
large_acc_net_cfg:
|
||||
large_boost_batch_size: 20
|
||||
large_boost_net_cfg:
|
||||
seq_length: 512
|
||||
vocab_size: 30522
|
||||
hidden_size: 1024
|
||||
|
|
|
@ -141,8 +141,8 @@ def extra_operations(cfg):
|
|||
cfg.nezha_net_cfg.compute_type = parse_dtype(cfg.nezha_net_cfg.compute_type)
|
||||
cfg.large_net_cfg.dtype = parse_dtype(cfg.large_net_cfg.dtype)
|
||||
cfg.large_net_cfg.compute_type = parse_dtype(cfg.large_net_cfg.compute_type)
|
||||
cfg.large_acc_net_cfg.dtype = parse_dtype(cfg.large_acc_net_cfg.dtype)
|
||||
cfg.large_acc_net_cfg.compute_type = parse_dtype(cfg.large_acc_net_cfg.compute_type)
|
||||
cfg.large_boost_net_cfg.dtype = parse_dtype(cfg.large_boost_net_cfg.dtype)
|
||||
cfg.large_boost_net_cfg.compute_type = parse_dtype(cfg.large_boost_net_cfg.compute_type)
|
||||
if cfg.bert_network == 'base':
|
||||
cfg.batch_size = cfg.base_batch_size
|
||||
_bert_net_cfg = cfg.base_net_cfg
|
||||
|
@ -152,9 +152,9 @@ def extra_operations(cfg):
|
|||
elif cfg.bert_network == 'large':
|
||||
cfg.batch_size = cfg.large_batch_size
|
||||
_bert_net_cfg = cfg.large_net_cfg
|
||||
elif cfg.bert_network == 'large_acc':
|
||||
cfg.batch_size = cfg.large_acc_batch_size
|
||||
_bert_net_cfg = cfg.large_acc_net_cfg
|
||||
elif cfg.bert_network == 'large_boost':
|
||||
cfg.batch_size = cfg.large_boost_batch_size
|
||||
_bert_net_cfg = cfg.large_boost_net_cfg
|
||||
else:
|
||||
pass
|
||||
cfg.bert_net_cfg = BertConfig(**_bert_net_cfg.__dict__)
|
||||
|
|
|
@ -40,12 +40,12 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
|
|||
rank_id = int(os.getenv("RANK_ID"))
|
||||
if do_train:
|
||||
if device_num == 1:
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=16, shuffle=True)
|
||||
else:
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
|
||||
num_shards=device_num, shard_id=rank_id)
|
||||
else:
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False,
|
||||
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=False,
|
||||
num_shards=device_num, shard_id=rank_id)
|
||||
|
||||
image_size = 224
|
||||
|
@ -73,7 +73,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
|
|||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=24)
|
||||
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=24)
|
||||
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
|
||||
|
||||
# apply batch operations
|
||||
data_set = data_set.batch(batch_size, drop_remainder=True)
|
||||
|
|
Loading…
Reference in New Issue