[Boost]Add MindBoost.

This commit is contained in:
linqingke 2021-09-01 16:35:40 +08:00
parent 166ad9e3a7
commit 82b88ade65
40 changed files with 186 additions and 132 deletions

View File

@ -277,6 +277,7 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
${CMAKE_SOURCE_DIR}/mindspore/numpy
${CMAKE_SOURCE_DIR}/mindspore/train
${CMAKE_SOURCE_DIR}/mindspore/boost
${CMAKE_SOURCE_DIR}/mindspore/common
${CMAKE_SOURCE_DIR}/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/communication

View File

@ -175,6 +175,7 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
${CMAKE_SOURCE_DIR}/mindspore/numpy
${CMAKE_SOURCE_DIR}/mindspore/train
${CMAKE_SOURCE_DIR}/mindspore/boost
${CMAKE_SOURCE_DIR}/mindspore/common
${CMAKE_SOURCE_DIR}/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/communication

View File

@ -13,22 +13,22 @@
# limitations under the License.
# ============================================================================
"""
Accelerating.
MindBoost(Beta Feature)
Provide auto accelerating for network, such as Less BN, Gradient Freeze.
"""
from .acc import *
from .boost import *
from .base import *
from .acc_cell_wrapper import *
from .boost_cell_wrapper import *
from .less_batch_normalization import *
from .grad_freeze import *
from .grad_accumulation import *
from .adasum import *
__all__ = ['AutoAcc',
__all__ = ['AutoBoost',
'OptimizerProcess', 'ParameterProcess',
'AccTrainOneStepCell', 'AccTrainOneStepWithLossScaleCell',
'BoostTrainOneStepCell', 'BoostTrainOneStepWithLossScaleCell',
'LessBN',
'GradientFreeze', 'FreezeOpt', 'freeze_cell',
'GradientAccumulation',

View File

@ -26,10 +26,34 @@ __all__ = ["OptimizerProcess", "ParameterProcess"]
class OptimizerProcess:
"""
Process optimizer for ACC.
Process optimizer for Boost. Currently, this class supports adding GC(grad centralization) tags
and creating new optimizers.
Args:
opt (Cell): Optimizer used.
Examples:
>>> from mindspore import Tensor, Parameter, nn
>>> from mindspore.ops import operations as P
>>> from mindspore.boost import OptimizerProcess
>>>
>>> class Net(nn.Cell):
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.matmul = ops.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... return output
...
>>> size, in_features, out_features = 16, 16, 10
>>> network = Net(in_features, out_features)
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> optimizer_process = OptimizerProcess(optimizer)
>>> optimizer_process.add_grad_centralization(network)
>>> optimizer = optimizer_process.generate_new_optimizer()
"""
def __init__(self, opt):
if isinstance(opt, LARS):
@ -113,7 +137,34 @@ class OptimizerProcess:
class ParameterProcess:
"""
Process parameter for ACC.
Process parameter for Boost. Currently, this class supports creating group parameters
and automatically setting gradient segmentation point.
Examples:
>>> from mindspore import Tensor, Parameter, nn
>>> from mindspore.ops import operations as P
>>> from mindspore.boost import OptimizerProcess
>>>
>>> class Net(nn.Cell):
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.weight2 = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight2')
... self.matmul = ops.MatMul()
... self.matmul2 = ops.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... output2 = self.matmul2(x, self.weight2)
... return output + output2
...
>>> size, in_features, out_features = 16, 16, 10
>>> network = Net(in_features, out_features)
>>> new_parameter = net.trainable_params()[:1]
>>> parameter_process = ParameterProcess()
>>> group_params = parameter_process.generate_group_params(new_parameter, net.trainable_params())
"""
def __init__(self):
self._parameter_indices = 1

View File

@ -12,16 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""acc"""
"""boost"""
from .less_batch_normalization import LessBN
from .grad_freeze import GradientFreeze
from .base import OptimizerProcess, ParameterProcess
__all__ = ["AutoAcc"]
__all__ = ["AutoBoost"]
_acc_config_level = {
_boost_config_level = {
"O0": {
"less_bn": False,
"grad_freeze": False,
@ -36,19 +36,19 @@ _acc_config_level = {
"adasum": True}}
class AutoAcc:
class AutoBoost:
"""
Provide auto accelerating for network.
Args:
level (Str): acc config level.
level (Str): boost config level.
"""
def __init__(self, level, kwargs):
if level not in _acc_config_level.keys():
if level not in _boost_config_level.keys():
level = 'O0'
self.level = level
acc_config = _acc_config_level[level]
self._acc_config = acc_config
boost_config = _boost_config_level[level]
self._boost_config = boost_config
self._fn_flag = True
self._gc_flag = True
self._param_groups = 10
@ -62,13 +62,13 @@ class AutoAcc:
def _get_configuration(self, kwargs):
"""Get configuration."""
for key, val in kwargs.items():
if key not in self._acc_config_func_map.keys():
if key not in self._boost_config_func_map.keys():
continue
self._acc_config_func_map[key](self, val)
self._boost_config_func_map[key](self, val)
def network_auto_process_train(self, network, optimizer):
"""Network train."""
if self._acc_config["less_bn"]:
if self._boost_config["less_bn"]:
network = LessBN(network, fn_flag=self._fn_flag)
optimizer_process = OptimizerProcess(optimizer)
group_params = self._param_processer.assign_parameter_group(network.trainable_params(),
@ -79,18 +79,18 @@ class AutoAcc:
optimizer_process.add_grad_centralization(network)
optimizer = optimizer_process.generate_new_optimizer()
if self._acc_config["grad_freeze"]:
if self._boost_config["grad_freeze"]:
freeze_processer = GradientFreeze(self._param_groups, self._freeze_type,
self._freeze_p, self._total_steps)
network, optimizer = freeze_processer.freeze_generate(network, optimizer)
if self._acc_config["adasum"]:
if self._boost_config["adasum"]:
setattr(optimizer, "adasum", True)
return network, optimizer
def network_auto_process_eval(self, network):
"""Network eval."""
if self._acc_config["less_bn"]:
if self._boost_config["less_bn"]:
network = LessBN(network)
return network
@ -120,7 +120,7 @@ class AutoAcc:
gradient_groups = list(gradient_groups)
self._gradient_groups = gradient_groups
_acc_config_func_map = {
_boost_config_func_map = {
"fn_flag": set_fn_flag,
"gc_flag": set_gc_flag,
"param_groups": set_param_groups,

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Acc Mode Cell Wrapper."""
"""Boost Mode Cell Wrapper."""
from mindspore.nn.wrap import TrainOneStepCell
import mindspore.context as context
from mindspore.context import ParallelMode, get_auto_parallel_context
@ -31,7 +31,7 @@ from .adasum import AdaSum
from .grad_accumulation import gradient_accumulation_op, gradient_clear_op
__all__ = ["AccTrainOneStepCell", "AccTrainOneStepWithLossScaleCell"]
__all__ = ["BoostTrainOneStepCell", "BoostTrainOneStepWithLossScaleCell"]
_get_delta_weight = C.MultitypeFuncGraph("_get_delta_weight")
@ -51,9 +51,9 @@ def _save_weight_process(new_parameter, old_parameter):
return P.Assign()(new_parameter, old_parameter)
class AccTrainOneStepCell(TrainOneStepCell):
class BoostTrainOneStepCell(TrainOneStepCell):
r"""
Acc Network training package class.
Boost Network training package class.
Wraps the network with an optimizer. The resulting Cell is trained with input '\*inputs'.
The backward graph will be created in the construct function to update the parameter. Different
@ -82,29 +82,29 @@ class AccTrainOneStepCell(TrainOneStepCell):
>>> optim = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> #1) Using the WithLossCell existing provide
>>> loss_net = nn.WithLossCell(net, loss_fn)
>>> train_net = nn.acc.AccTrainOneStepCell(loss_net, optim)
>>> train_net = boost.BoostTrainOneStepCell(loss_net, optim)
>>>
>>> #2) Using user-defined WithLossCell
>>> class MyWithLossCell(Cell):
mindspore. def __init__(self, backbone, loss_fn):
mindspore. super(MyWithLossCell, self).__init__(auto_prefix=False)
mindspore. self._backbone = backbone
mindspore. self._loss_fn = loss_fn
mindspore.
mindspore. def construct(self, x, y, label):
mindspore. out = self._backbone(x, y)
mindspore. return self._loss_fn(out, label)
mindspore.
mindspore. @property
mindspore. def backbone_network(self):
mindspore. return self._backbone
mindspore.
... def __init__(self, backbone, loss_fn):
... super(MyWithLossCell, self).__init__(auto_prefix=False)
... self._backbone = backbone
... self._loss_fn = loss_fn
...
... def construct(self, x, y, label):
... out = self._backbone(x, y)
... return self._loss_fn(out, label)
...
... @property
... def backbone_network(self):
... return self._backbone
...
>>> loss_net = MyWithLossCell(net, loss_fn)
>>> train_net = nn.acc.AccTrainOneStepCellTrainOneStepCell(loss_net, optim)
>>> train_net = boost.BoostTrainOneStepCellTrainOneStepCell(loss_net, optim)
"""
def __init__(self, network, optimizer, sens=1.0):
super(AccTrainOneStepCell, self).__init__(network, optimizer, sens)
super(BoostTrainOneStepCell, self).__init__(network, optimizer, sens)
self.hyper_map = C.HyperMap()
self.freeze = isinstance(optimizer, FreezeOpt)
if not self.freeze:
@ -240,13 +240,13 @@ class AccTrainOneStepCell(TrainOneStepCell):
return is_enable
class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
class BoostTrainOneStepWithLossScaleCell(BoostTrainOneStepCell):
r"""
Acc Network training with loss scaling.
Boost Network training with loss scaling.
This is a training step with loss scaling. It takes a network, an optimizer and possibly a scale update
Cell as args. The loss scale value can be updated in both host side or device side. The
AccTrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
BoostTrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
the value must be provided. If the Tensor type of `scale_sense` is not given, the loss scale update logic
must be provied by Cell type of `scale_sense`.
@ -282,16 +282,16 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
>>> from mindspore.common import dtype as mstype
>>>
>>> class Net(nn.Cell):
mindspore. def __init__(self, in_features, out_features):
mindspore. super(Net, self).__init__()
mindspore. self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
mindspore. name='weight')
mindspore. self.matmul = P.MatMul()
mindspore.
mindspore. def construct(self, x):
mindspore. output = self.matmul(x, self.weight)
mindspore. return output
mindspore.
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.matmul = ops.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... return output
...
>>> size, in_features, out_features = 16, 16, 10
>>> #1) when the type of scale_sense is Cell:
>>> net = Net(in_features, out_features)
@ -299,7 +299,7 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> net_with_loss = WithLossCell(net, loss)
>>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
>>> train_network = nn.acc.AccTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
>>> train_network = boost.BoostTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
>>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
>>> labels = Tensor(np.ones([out_features,]), mindspore.float32)
>>> output = train_network(input, labels)
@ -312,11 +312,11 @@ class AccTrainOneStepWithLossScaleCell(AccTrainOneStepCell):
>>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32))
>>> label = Tensor(np.zeros([size, out_features]).astype(np.float32))
>>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
>>> train_network = nn.acc.AccTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=scaling_sens)
>>> train_network = boost.BoostTrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=scaling_sens)
>>> output = train_network(inputs, label)
"""
def __init__(self, network, optimizer, scale_sense):
super(AccTrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
super(BoostTrainOneStepWithLossScaleCell, self).__init__(network, optimizer, sens=None)
self.base = Tensor(1, mstype.float32)
self.reduce_sum = P.ReduceSum(keep_dims=False)
self.less_equal = P.LessEqual()

View File

@ -91,13 +91,13 @@ class LessBN(Cell):
network (Cell): Network to be modified.
Examples:
>>> network = acc.LessBN(network)
>>> network = boost.LessBN(network)
"""
def __init__(self, network, fn_flag=False):
super(LessBN, self).__init__()
self.network = network
self.network.set_acc("less_bn")
self.network.set_boost("less_bn")
self.network.update_cell_prefix()
if fn_flag:
self._convert_to_less_bn_net(self.network)

View File

@ -1145,29 +1145,29 @@ class Cell(Cell_):
self._add_init_args(**flags)
return self
def set_acc(self, acc_type):
def set_boost(self, boost_type):
"""
In order to improve the network performance, configure the network auto enable to
accelerate the algorithm in the algorithm library.
If `acc_type is not in the algorithm library`, Please view the algorithm in the algorithm library
If `boost_type is not in the algorithm library`, Please view the algorithm in the algorithm library
through `algorithm library`.
Note:
Some acceleration algorithms may affect the accuracy of the network, please choose carefully.
Args:
acc_type (str): accelerate algorithm.
boost_type (str): accelerate algorithm.
Returns:
Cell, the cell itself.
Raises:
ValueError: If acc_type is not in the algorithm library.
ValueError: If boost_type is not in the algorithm library.
"""
if acc_type not in ("less_bn",):
raise ValueError("The acc_type is not in the algorithm library.")
flags = {"less_bn": acc_type == "less_bn"}
if boost_type not in ("less_bn",):
raise ValueError("The boost_type is not in the algorithm library.")
flags = {"less_bn": boost_type == "less_bn"}
self.add_flags_recursive(**flags)
return self

View File

@ -17,13 +17,13 @@ from .. import nn
from .._checkparam import Validator as validator
from .._checkparam import Rel
from ..common import dtype as mstype
from ..nn import acc
from ..nn.wrap.cell_wrapper import _VirtualDatasetCell, _TrainPipelineAccuStepCell
from ..nn.wrap.loss_scale import _TrainPipelineWithLossScaleCell
from ..ops import functional as F
from ..parallel._utils import _get_parallel_mode, _get_pipeline_stages
from .loss_scale_manager import DynamicLossScaleManager, LossScaleManager
from ..context import ParallelMode
from .. import boost
from .. import context
@ -111,7 +111,7 @@ def _add_loss_network(network, loss_fn, cast_model_type):
return network
def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level='O0', **kwargs):
def build_train_network(network, optimizer, loss_fn=None, level='O0', boost_level='O0', **kwargs):
"""
Build the mixed precision training cell automatically.
@ -147,9 +147,9 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
(with property `drop_overflow_update=False` ), or a `ValueError` exception will be raised.
"""
validator.check_value_type('network', network, nn.Cell)
validator.check_value_type('optimizer', optimizer, (nn.Optimizer, acc.FreezeOpt))
validator.check_value_type('optimizer', optimizer, (nn.Optimizer, boost.FreezeOpt))
validator.check('level', level, "", ['O0', 'O2', 'O3', "auto"], Rel.IN)
validator.check('acc_level', acc_level, "", ['O0', 'O1', 'O2'], Rel.IN)
validator.check('boost_level', boost_level, "", ['O0', 'O1', 'O2'], Rel.IN)
if level == "auto":
device_target = context.get_context('device_target')
@ -175,9 +175,9 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
if _get_parallel_mode() in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
network = _VirtualDatasetCell(network)
enable_acc = False
if acc_level in ["O1", "O2"]:
enable_acc = True
enable_boost = False
if boost_level in ["O1", "O2"]:
enable_boost = True
loss_scale = 1.0
if config["loss_scale_manager"] is not None:
@ -193,8 +193,8 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
if _get_pipeline_stages() > 1:
network = _TrainPipelineWithLossScaleCell(network, optimizer,
scale_sense=update_cell).set_train()
elif enable_acc:
network = acc.AccTrainOneStepWithLossScaleCell(network, optimizer,
elif enable_boost:
network = boost.BoostTrainOneStepWithLossScaleCell(network, optimizer,
scale_sense=update_cell).set_train()
else:
network = nn.TrainOneStepWithLossScaleCell(network, optimizer,
@ -202,8 +202,8 @@ def build_train_network(network, optimizer, loss_fn=None, level='O0', acc_level=
return network
if _get_pipeline_stages() > 1:
network = _TrainPipelineAccuStepCell(network, optimizer).set_train()
elif enable_acc:
network = acc.AccTrainOneStepCell(network, optimizer, loss_scale).set_train()
elif enable_boost:
network = boost.BoostTrainOneStepCell(network, optimizer, loss_scale).set_train()
else:
network = nn.TrainOneStepCell(network, optimizer, loss_scale).set_train()
return network

View File

@ -32,7 +32,7 @@ from ..parallel._ps_context import _is_role_pserver, _is_role_sched
from ..nn.metrics import Loss
from .. import nn
from ..nn.wrap.cell_wrapper import _VirtualDatasetCell
from ..nn.acc import acc
from ..boost import AutoBoost
from ..context import ParallelMode
from ..parallel._cost_model_context import _set_multi_subgraphs
from .dataset_helper import DatasetHelper, connect_network_with_dataset
@ -89,13 +89,13 @@ class Model:
O2 is recommended on GPU, O3 is recommended on Ascend.The more detailed explanation of `amp_level` setting
can be found at `mindspore.amp.build_train_network` .
acc_level (str): Option for argument `level` in `mindspore.acc` , level for acc mode
boost_level (str): Option for argument `level` in `mindspore.boost` , level for boost mode
training. Supports ["O0", "O1", "O2"]. Default: "O0".
- O0: Do not change.
- O1: Enable the acc mode, the performance is improved by about 20%, and
- O1: Enable the boost mode, the performance is improved by about 20%, and
the accuracy is the same as the original accuracy.
- O2: Enable the acc mode, the performance is improved by about 30%, and
- O2: Enable the boost mode, the performance is improved by about 30%, and
the accuracy is reduced by less than 3%.
Examples:
>>> from mindspore import Model, nn
@ -132,7 +132,7 @@ class Model:
"""
def __init__(self, network, loss_fn=None, optimizer=None, metrics=None, eval_network=None,
eval_indexes=None, amp_level="O0", acc_level="O0", **kwargs):
eval_indexes=None, amp_level="O0", boost_level="O0", **kwargs):
self._network = network
self._loss_fn = loss_fn
self._optimizer = optimizer
@ -141,7 +141,7 @@ class Model:
self._keep_bn_fp32 = True
self._check_kwargs(kwargs)
self._amp_level = amp_level
self._acc_level = acc_level
self._boost_level = boost_level
self._eval_network = eval_network
self._process_amp_args(kwargs)
self._parallel_mode = _get_parallel_mode()
@ -152,7 +152,7 @@ class Model:
self._check_amp_level_arg(optimizer, amp_level)
self._check_for_graph_cell(kwargs)
self._build_acc_network(kwargs)
self._build_boost_network(kwargs)
self._train_network = self._build_train_network()
self._build_eval_network(metrics, self._eval_network, eval_indexes)
self._build_predict_network()
@ -194,16 +194,16 @@ class Model:
if hasattr(dataset, '__model_hash__') and dataset.__model_hash__ != hash(self):
raise RuntimeError('The Dataset cannot be bound to different models, please create a new dataset.')
def _build_acc_network(self, kwargs):
"""Build the acc network."""
processor = acc.AutoAcc(self._acc_level, kwargs)
def _build_boost_network(self, kwargs):
"""Build the boost network."""
processor = AutoBoost(self._boost_level, kwargs)
if processor.level not in ["O1", "O2"]:
return
if self._optimizer is None:
logger.warning("In acc mode, the optimizer must be defined.")
logger.warning("In boost mode, the optimizer must be defined.")
return
if self._eval_network is None and self._metrics is None:
logger.warning("In acc mode, the eval_network and metrics cannot be undefined at the same time.")
logger.warning("In boost mode, the eval_network and metrics cannot be undefined at the same time.")
return
self._network, self._optimizer = processor.network_auto_process_train(self._network, self._optimizer)
@ -222,7 +222,7 @@ class Model:
self._optimizer,
self._loss_fn,
level=self._amp_level,
acc_level=self._acc_level,
boost_level=self._boost_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
@ -230,7 +230,7 @@ class Model:
self._optimizer,
self._loss_fn,
level=self._amp_level,
acc_level=self._acc_level,
boost_level=self._boost_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):

View File

@ -210,6 +210,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil
│ ├──local_adapter.py # Get local ID
│ └──moxing_adapter.py # Parameter processing
├── default_config.yaml # Training parameter profile(ascend)
├── default_config_boost.yaml # Training parameter profile(ascend boost)
├── default_config_cpu.yaml # Training parameter profile(cpu)
├── default_config_gpu.yaml # Training parameter profile(gpu)
├── train.py # training script

View File

@ -211,7 +211,7 @@ MobileNetV2总体网络架构如下
│ ├──local_adapter.py # 获取本地id
│ └──moxing_adapter.py # 云上数据准备
├── default_config.yaml # 训练配置参数(ascend)
├── default_config_acc.yaml # 训练配置参数(ascend acc模式)
├── default_config_boost.yaml # 训练配置参数(ascend boost模式)
├── default_config_cpu.yaml # 训练配置参数(cpu)
├── default_config_gpu.yaml # 训练配置参数(gpu)
├── train.py # 训练脚本

View File

@ -18,7 +18,7 @@ num_classes: 1000
image_height: 224
image_width: 224
num_workers: 32
acc_mode: "O0"
boost_mode: "O0"
batch_size: 256
epoch_size: 200
warmup_epochs: 4

View File

@ -18,7 +18,7 @@ num_classes: 1000
image_height: 224
image_width: 224
num_workers: 32
acc_mode: "O1"
boost_mode: "O1"
batch_size: 256
epoch_size: 200
warmup_epochs: 4

View File

@ -18,7 +18,7 @@ num_classes: 26
image_height: 224
image_width: 224
num_workers: 8
acc_mode: "O0"
boost_mode: "O0"
batch_size: 150
epoch_size: 15
warmup_epochs: 0

View File

@ -18,7 +18,7 @@ num_classes: 1000
image_height: 224
image_width: 224
num_workers: 8
acc_mode: "O0"
boost_mode: "O0"
batch_size: 150
epoch_size: 200
warmup_epochs: 0

View File

@ -172,7 +172,7 @@ def train_mobilenetv2():
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale,
metrics=metrics, eval_network=dist_eval_network,
amp_level="O2", keep_batchnorm_fp32=False,
acc_level=config.acc_mode)
boost_level=config.boost_mode)
else:
opt = Momentum(net.trainable_params(), lr, config.momentum, config.weight_decay)

View File

@ -209,7 +209,7 @@ If you want to run in modelarts, please check the official documentation of [mod
├── resnet18_imagenet2012_config_gpu.yaml
├── resnet34_imagenet2012_config.yaml
├── resnet50_cifar10_config.yaml
├── resnet50_imagenet2012_Acc_config.yaml # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
├── resnet50_imagenet2012_Boost_config.yaml # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
├── resnet50_imagenet2012_Ascend_Thor_config.yaml
├── resnet50_imagenet2012_config.yaml
├── resnet50_imagenet2012_GPU_Thor_config.yaml

View File

@ -195,7 +195,7 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]
├── resnet18_imagenet2012_config_gpu.yaml
├── resnet34_imagenet2012_config.yaml
├── resnet50_cifar10_config.yaml
├── resnet50_imagenet2012_Acc_config.yaml # 高性能版本性能提高超过10%而精度下降少于1%
├── resnet50_imagenet2012_Boost_config.yaml # 高性能版本性能提高超过10%而精度下降少于1%
├── resnet50_imagenet2012_Ascend_Thor_config.yaml
├── resnet50_imagenet2012_config.yaml
├── resnet50_imagenet2012_GPU_Thor_config.yaml

View File

@ -50,7 +50,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
all_reduce_fusion_config:

View File

@ -50,7 +50,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -50,7 +50,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -52,7 +52,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -52,7 +52,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -52,7 +52,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -50,7 +50,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
all_reduce_fusion_config:

View File

@ -51,7 +51,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "HeUniform"
dense_init: "HeUniform"
all_reduce_fusion_config:

View File

@ -52,7 +52,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O1"
boost_mode: "O1"
conv_init: "TruncatedNormal"
dense_init: "RandomNormal"
all_reduce_fusion_config:

View File

@ -51,7 +51,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "HeUniform"
dense_init: "HeUniform"
all_reduce_fusion_config:

View File

@ -52,7 +52,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
all_reduce_fusion_config:

View File

@ -25,7 +25,7 @@ eval: False
save_ckpt: False
mode_name: "GRAPH"
dtype: "fp16"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
train_image_size: 224

View File

@ -53,7 +53,7 @@ eval_interval: 1
enable_cache: False
cache_session_id: ""
mode_name: "GRAPH"
acc_mode: "O0"
boost_mode: "O0"
conv_init: "XavierUniform"
dense_init: "TruncatedNormal"
all_reduce_fusion_config:

View File

@ -110,7 +110,7 @@ def set_parameter():
gradients_mean=True)
set_algo_parameters(elementwise_op_strategy_follow=True)
if config.net_name == "resnet50" or config.net_name == "se-resnet50":
if config.acc_mode not in ["O1", "O2"]:
if config.boost_mode not in ["O1", "O2"]:
context.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config)
elif config.net_name in ["resnet101", "resnet152"]:
context.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config)
@ -258,7 +258,7 @@ def train_net():
model = Model(net, loss_fn=loss, optimizer=opt, metrics=metrics, eval_network=dist_eval_network)
else:
model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics,
amp_level="O2", acc_level=config.acc_mode, keep_batchnorm_fp32=False,
amp_level="O2", boost_level=config.boost_mode, keep_batchnorm_fp32=False,
eval_network=dist_eval_network)
if config.optimizer == "Thor" and config.dataset == "imagenet2012":

View File

@ -35,7 +35,7 @@ schema_dir: ''
# ==============================================================================
# pretrain related
batch_size: 32
# Available: [base, nezha, large, large_acc]
# Available: [base, nezha, large, large_boost]
bert_network: 'base'
loss_scale_value: 65536
scale_factor: 2
@ -138,8 +138,8 @@ large_net_cfg:
dtype: mstype.float32
compute_type: mstype.float16
# Accelerated large network which is only supported in Ascend yet.
large_acc_batch_size: 24
large_acc_net_cfg:
large_boost_batch_size: 24
large_boost_net_cfg:
seq_length: 512
vocab_size: 30522
hidden_size: 1024

View File

@ -35,8 +35,8 @@ schema_dir: ''
# ==============================================================================
# pretrain related
batch_size: 20
# Available: [base, nezha, large, large_acc]
bert_network: 'large_acc'
# Available: [base, nezha, large, large_boost]
bert_network: 'large_boost'
loss_scale_value: 65536
scale_factor: 2
scale_window: 1000
@ -138,8 +138,8 @@ large_net_cfg:
dtype: mstype.float32
compute_type: mstype.float16
# Accelerated large network which is only supported in Ascend yet.
large_acc_batch_size: 20
large_acc_net_cfg:
large_boost_batch_size: 20
large_boost_net_cfg:
seq_length: 512
vocab_size: 30522
hidden_size: 1024

View File

@ -141,8 +141,8 @@ def extra_operations(cfg):
cfg.nezha_net_cfg.compute_type = parse_dtype(cfg.nezha_net_cfg.compute_type)
cfg.large_net_cfg.dtype = parse_dtype(cfg.large_net_cfg.dtype)
cfg.large_net_cfg.compute_type = parse_dtype(cfg.large_net_cfg.compute_type)
cfg.large_acc_net_cfg.dtype = parse_dtype(cfg.large_acc_net_cfg.dtype)
cfg.large_acc_net_cfg.compute_type = parse_dtype(cfg.large_acc_net_cfg.compute_type)
cfg.large_boost_net_cfg.dtype = parse_dtype(cfg.large_boost_net_cfg.dtype)
cfg.large_boost_net_cfg.compute_type = parse_dtype(cfg.large_boost_net_cfg.compute_type)
if cfg.bert_network == 'base':
cfg.batch_size = cfg.base_batch_size
_bert_net_cfg = cfg.base_net_cfg
@ -152,9 +152,9 @@ def extra_operations(cfg):
elif cfg.bert_network == 'large':
cfg.batch_size = cfg.large_batch_size
_bert_net_cfg = cfg.large_net_cfg
elif cfg.bert_network == 'large_acc':
cfg.batch_size = cfg.large_acc_batch_size
_bert_net_cfg = cfg.large_acc_net_cfg
elif cfg.bert_network == 'large_boost':
cfg.batch_size = cfg.large_boost_batch_size
_bert_net_cfg = cfg.large_boost_net_cfg
else:
pass
cfg.bert_net_cfg = BertConfig(**_bert_net_cfg.__dict__)

View File

@ -40,12 +40,12 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
rank_id = int(os.getenv("RANK_ID"))
if do_train:
if device_num == 1:
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=16, shuffle=True)
else:
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
num_shards=device_num, shard_id=rank_id)
else:
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False,
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=False,
num_shards=device_num, shard_id=rank_id)
image_size = 224
@ -73,7 +73,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
type_cast_op = C2.TypeCast(mstype.int32)
data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=24)
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=24)
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)
# apply batch operations
data_set = data_set.batch(batch_size, drop_remainder=True)