forked from mindspore-Ecosystem/mindspore
add distribute training for gpu benchmark
This commit is contained in:
parent
51a57b9243
commit
a57fdc3b2b
|
@ -277,7 +277,7 @@ sh run_standalone_train_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATA
|
||||||
sh run_eval_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH]
|
sh run_eval_gpu.sh [resnet50|resnet101] [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH]
|
||||||
|
|
||||||
# gpu benchmark example
|
# gpu benchmark example
|
||||||
sh run_gpu_resnet_benchmark.sh [IMAGENET_DATASET_PATH] [BATCH_SIZE](optional)
|
sh run_gpu_resnet_benchmark.sh [IMAGENET_DATASET_PATH] [BATCH_SIZE](optional) [DEVICE_NUM](optional)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Running parameter server mode training
|
#### Running parameter server mode training
|
||||||
|
@ -345,16 +345,11 @@ epoch: 5 step: 5004, loss is 3.3501816
|
||||||
|
|
||||||
```
|
```
|
||||||
# ========START RESNET50 GPU BENCHMARK========
|
# ========START RESNET50 GPU BENCHMARK========
|
||||||
step time: 22549.130 ms, fps: 11 img/sec. epoch: 1 step: 1, loss is 6.940182
|
step time: 12416.098 ms, fps: 412 img/sec. epoch: 1 step: 20, loss is 6.940182
|
||||||
step time: 182.485 ms, fps: 1402 img/sec. epoch: 1 step: 2, loss is 7.078993
|
step time: 3472.037 ms, fps: 1474 img/sec. epoch: 2 step: 20, loss is 7.078993
|
||||||
step time: 175.263 ms, fps: 1460 img/sec. epoch: 1 step: 3, loss is 7.559594
|
step time: 3469.523 ms, fps: 1475 img/sec. epoch: 3 step: 20, loss is 7.559594
|
||||||
step time: 174.775 ms, fps: 1464 img/sec. epoch: 1 step: 4, loss is 8.020937
|
step time: 3460.311 ms, fps: 1479 img/sec. epoch: 4 step: 20, loss is 6.920937
|
||||||
step time: 175.564 ms, fps: 1458 img/sec. epoch: 1 step: 5, loss is 8.140132
|
step time: 3460.543 ms, fps: 1479 img/sec. epoch: 5 step: 20, loss is 6.814013
|
||||||
step time: 175.438 ms, fps: 1459 img/sec. epoch: 1 step: 6, loss is 8.021118
|
|
||||||
step time: 175.760 ms, fps: 1456 img/sec. epoch: 1 step: 7, loss is 7.910158
|
|
||||||
step time: 176.033 ms, fps: 1454 img/sec. epoch: 1 step: 8, loss is 7.940162
|
|
||||||
step time: 175.995 ms, fps: 1454 img/sec. epoch: 1 step: 9, loss is 7.740654
|
|
||||||
step time: 175.313 ms, fps: 1460 img/sec. epoch: 1 step: 10, loss is 7.956182
|
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
## [Evaluation Process](#contents)
|
## [Evaluation Process](#contents)
|
||||||
|
|
|
@ -14,45 +14,54 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
"""train resnet."""
|
"""train resnet."""
|
||||||
import argparse
|
import argparse
|
||||||
|
import ast
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mindspore import context
|
from mindspore import context
|
||||||
from mindspore import Tensor
|
from mindspore import Tensor
|
||||||
from mindspore.nn.optim.momentum import Momentum
|
from mindspore.nn.optim.momentum import Momentum
|
||||||
from mindspore.train.model import Model
|
from mindspore.train.model import Model
|
||||||
|
from mindspore.context import ParallelMode
|
||||||
from mindspore.train.callback import Callback, LossMonitor
|
from mindspore.train.callback import Callback, LossMonitor
|
||||||
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
|
||||||
from mindspore.train.loss_scale_manager import FixedLossScaleManager
|
from mindspore.train.loss_scale_manager import FixedLossScaleManager
|
||||||
|
from mindspore.communication.management import init, get_group_size
|
||||||
from mindspore.common import set_seed
|
from mindspore.common import set_seed
|
||||||
import mindspore.nn as nn
|
import mindspore.nn as nn
|
||||||
import mindspore.common.initializer as weight_init
|
import mindspore.common.initializer as weight_init
|
||||||
import mindspore.common.dtype as mstype
|
|
||||||
import mindspore.dataset.engine as de
|
import mindspore.dataset.engine as de
|
||||||
import mindspore.dataset.vision.c_transforms as C
|
import mindspore.dataset.vision.c_transforms as C
|
||||||
import mindspore.dataset.transforms.c_transforms as C2
|
|
||||||
from src.resnet_gpu_benchmark import resnet50 as resnet
|
from src.resnet_gpu_benchmark import resnet50 as resnet
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Image classification')
|
parser = argparse.ArgumentParser(description='Image classification')
|
||||||
parser.add_argument('--batch_size', type=str, default="256", help='Batch_size: default 256.')
|
parser.add_argument('--batch_size', type=str, default="256", help='Batch_size: default 256.')
|
||||||
parser.add_argument('--epoch_size', type=str, default="2", help='Epoch_size: default 2')
|
parser.add_argument('--epoch_size', type=str, default="2", help='Epoch_size: default 2')
|
||||||
|
parser.add_argument('--print_per_steps', type=str, default="20", help='Print loss and time per steps: default 20')
|
||||||
|
parser.add_argument('--run_distribute', type=ast.literal_eval, default=False, help='Run distribute')
|
||||||
parser.add_argument('--dataset_path', type=str, default=None, help='Imagenet dataset path')
|
parser.add_argument('--dataset_path', type=str, default=None, help='Imagenet dataset path')
|
||||||
args_opt = parser.parse_args()
|
args_opt = parser.parse_args()
|
||||||
|
|
||||||
set_seed(1)
|
set_seed(1)
|
||||||
|
|
||||||
class MyTimeMonitor(Callback):
|
class MyTimeMonitor(Callback):
|
||||||
def __init__(self, batch_size):
|
def __init__(self, batch_size, sink_size):
|
||||||
super(MyTimeMonitor, self).__init__()
|
super(MyTimeMonitor, self).__init__()
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
|
self.size = sink_size
|
||||||
def step_begin(self, run_context):
|
def step_begin(self, run_context):
|
||||||
self.step_time = time.time()
|
self.step_time = time.time()
|
||||||
def step_end(self, run_context):
|
def step_end(self, run_context):
|
||||||
step_mseconds = (time.time() - self.step_time) * 1000
|
step_mseconds = (time.time() - self.step_time) * 1000
|
||||||
fps = self.batch_size / step_mseconds *1000
|
fps = self.batch_size / step_mseconds *1000 * self.size
|
||||||
print("step time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ")
|
print("step time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True, end=" ")
|
||||||
|
|
||||||
|
def pad(image):
|
||||||
|
zeros = np.zeros([224, 224, 1], dtype=np.uint8)
|
||||||
|
output = np.concatenate((image, zeros), axis=2)
|
||||||
|
return output
|
||||||
|
|
||||||
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="GPU"):
|
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="GPU"):
|
||||||
ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
|
ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=4, shuffle=True)
|
||||||
|
|
||||||
image_size = 224
|
image_size = 224
|
||||||
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
||||||
|
@ -73,15 +82,12 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
|
||||||
C.Normalize(mean=mean, std=std),
|
C.Normalize(mean=mean, std=std),
|
||||||
]
|
]
|
||||||
|
|
||||||
type_cast_op = C2.TypeCast(mstype.int32)
|
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=4)
|
||||||
|
ds = ds.map(operations=pad, input_columns="image", num_parallel_workers=4)
|
||||||
ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
|
|
||||||
ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
|
|
||||||
ds = ds.map(operations=C2.PadEnd(pad_shape=[224, 224, 4], pad_value=0), input_columns="image",
|
|
||||||
num_parallel_workers=8)
|
|
||||||
# apply batch operations
|
# apply batch operations
|
||||||
ds = ds.batch(batch_size, drop_remainder=True)
|
ds = ds.batch(batch_size, drop_remainder=True)
|
||||||
# apply dataset repeat operation
|
# apply dataset repeat operation
|
||||||
|
if repeat_num > 1:
|
||||||
ds = ds.repeat(repeat_num)
|
ds = ds.repeat(repeat_num)
|
||||||
|
|
||||||
return ds
|
return ds
|
||||||
|
@ -101,16 +107,27 @@ def get_liner_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per
|
||||||
return lr_each_step
|
return lr_each_step
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# set args
|
||||||
dev = "GPU"
|
dev = "GPU"
|
||||||
epoch_size = int(args_opt.epoch_size)
|
epoch_size = int(args_opt.epoch_size)
|
||||||
total_batch = int(args_opt.batch_size)
|
total_batch = int(args_opt.batch_size)
|
||||||
|
print_per_steps = int(args_opt.print_per_steps)
|
||||||
|
|
||||||
# init context
|
# init context
|
||||||
context.set_context(mode=context.GRAPH_MODE, device_target=dev, save_graphs=False)
|
context.set_context(mode=context.GRAPH_MODE, device_target=dev, save_graphs=False)
|
||||||
|
if args_opt.run_distribute:
|
||||||
|
init()
|
||||||
|
context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
|
||||||
|
gradients_mean=True, all_reduce_fusion_config=[85, 160])
|
||||||
|
|
||||||
# create dataset
|
# create dataset
|
||||||
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=1,
|
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=1,
|
||||||
batch_size=total_batch, target=dev)
|
batch_size=total_batch, target=dev)
|
||||||
step_size = dataset.get_dataset_size()
|
step_size = dataset.get_dataset_size()
|
||||||
|
if (print_per_steps > step_size or print_per_steps < 1):
|
||||||
|
print("Arg: print_per_steps should lessequal to dataset_size ", step_size)
|
||||||
|
print("Change to default: 20")
|
||||||
|
print_per_steps = 20
|
||||||
# define net
|
# define net
|
||||||
net = resnet(class_num=1001)
|
net = resnet(class_num=1001)
|
||||||
|
|
||||||
|
@ -151,10 +168,10 @@ if __name__ == '__main__':
|
||||||
amp_level="O2", keep_batchnorm_fp32=False)
|
amp_level="O2", keep_batchnorm_fp32=False)
|
||||||
|
|
||||||
# define callbacks
|
# define callbacks
|
||||||
time_cb = MyTimeMonitor(total_batch)
|
time_cb = MyTimeMonitor(total_batch, print_per_steps)
|
||||||
loss_cb = LossMonitor()
|
loss_cb = LossMonitor()
|
||||||
cb = [time_cb, loss_cb]
|
cb = [time_cb, loss_cb]
|
||||||
|
|
||||||
# train model
|
# train model
|
||||||
print("========START RESNET50 GPU BENCHMARK========")
|
print("========START RESNET50 GPU BENCHMARK========")
|
||||||
model.train(epoch_size, dataset, callbacks=cb, sink_size=dataset.get_dataset_size())
|
model.train(int(epoch_size * step_size / print_per_steps), dataset, callbacks=cb, sink_size=print_per_steps)
|
||||||
|
|
|
@ -14,9 +14,10 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
if [ $# != 1 ] && [ $# != 2 ]
|
if [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ]
|
||||||
then
|
then
|
||||||
echo "Usage: sh run_gpu_resnet_benchmark.sh [DATASET_PATH] [BATCH_SIZE](optional)"
|
echo "Usage: sh run_gpu_resnet_benchmark.sh [DATASET_PATH] [BATCH_SIZE](optional) [DEVICE_NUM](optional)"
|
||||||
|
echo "Example: sh run_gpu_resnet_benchmark.sh /path/imagenet/train 256 8"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -40,3 +41,9 @@ if [ $# == 2 ]
|
||||||
then
|
then
|
||||||
python ${self_path}/../gpu_resnet_benchmark.py --dataset_path=$DATAPATH --batch_size=$2
|
python ${self_path}/../gpu_resnet_benchmark.py --dataset_path=$DATAPATH --batch_size=$2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $# == 3 ]
|
||||||
|
then
|
||||||
|
mpirun --allow-run-as-root -n $3 python ${self_path}/../gpu_resnet_benchmark.py --run_distribute=True \
|
||||||
|
--dataset_path=$DATAPATH --batch_size=$2
|
||||||
|
fi
|
||||||
|
|
Loading…
Reference in New Issue