modify for Squeezenet_gpu
This commit is contained in:
parent
1518006924
commit
a8c9859141
|
@ -198,7 +198,7 @@ Parameters for both training and evaluation can be set in *.yaml
|
|||
|
||||
```py
|
||||
"class_num": 10, # dataset class num
|
||||
"global_batch_size": 32, # the total batch_size for training and evaluation
|
||||
"batch_size": 32, # Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num.
|
||||
"loss_scale": 1024, # loss scale
|
||||
"momentum": 0.9, # momentum
|
||||
"weight_decay": 1e-4, # weight decay
|
||||
|
@ -219,7 +219,7 @@ Parameters for both training and evaluation can be set in *.yaml
|
|||
|
||||
```py
|
||||
"class_num": 1000, # dataset class num
|
||||
"global_batch_size": 256, # the total batch_size for training and evaluation
|
||||
"batch_size": 32, # Batch_size for training, evaluation and export
|
||||
"loss_scale": 1024, # loss scale
|
||||
"momentum": 0.9, # momentum
|
||||
"weight_decay": 7e-5, # weight decay
|
||||
|
@ -242,7 +242,7 @@ Parameters for both training and evaluation can be set in *.yaml
|
|||
|
||||
```py
|
||||
"class_num": 10, # dataset class num
|
||||
"global_batch_size": 32, # the total batch_size for training and evaluation
|
||||
"batch_size": 32, # Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num.
|
||||
"loss_scale": 1024, # loss scale
|
||||
"momentum": 0.9, # momentum
|
||||
"weight_decay": 1e-4, # weight decay
|
||||
|
@ -263,7 +263,7 @@ Parameters for both training and evaluation can be set in *.yaml
|
|||
|
||||
```py
|
||||
"class_num": 1000, # dataset class num
|
||||
"global_batch_size": 256, # The total batch_size for training and evaluation
|
||||
"batch_size": 32, # Batch_size for training, evaluation and export
|
||||
"loss_scale": 1024, # loss scale
|
||||
"momentum": 0.9, # momentum
|
||||
"weight_decay": 7e-5, # weight decay
|
||||
|
|
|
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_cifar10-120_195.ckpt"
|
|||
net_name: "suqeezenet"
|
||||
dataset : "cifar10"
|
||||
class_num: 10
|
||||
global_batch_size: 32
|
||||
batch_size: 32
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.0001
|
||||
|
@ -55,7 +55,7 @@ load_path: "The location of checkpoint for obs"
|
|||
device_target: "Target device type, available: [Ascend, GPU, CPU]"
|
||||
enable_profiling: "Whether enable profiling while training, default: False"
|
||||
num_classes: "Class for dataset"
|
||||
global_batch_size: "The total batch_size for training and evaluation"
|
||||
batch_size: "Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num"
|
||||
epoch_size: "Total training epochs."
|
||||
keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
|
|
|
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_imagenet-200_5004.ckpt"
|
|||
net_name: "suqeezenet"
|
||||
dataset : "imagenet"
|
||||
class_num: 1000
|
||||
global_batch_size: 256
|
||||
batch_size: 32
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.00007
|
||||
|
@ -57,7 +57,7 @@ load_path: 'The location of checkpoint for obs'
|
|||
device_target: 'Target device type, available: [Ascend, GPU, CPU]'
|
||||
enable_profiling: 'Whether enable profiling while training, default: False'
|
||||
num_classes: 'Class for dataset'
|
||||
global_batch_size: "The total batch_size for training and evaluation"
|
||||
batch_size: "Batch_size for training, evaluation and export"
|
||||
epoch_size: "Total training epochs."
|
||||
keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
|
|
|
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_residual_cifar10-150_195.ckpt"
|
|||
net_name: "suqeezenet_residual"
|
||||
dataset : "cifar10"
|
||||
class_num: 10
|
||||
global_batch_size: 32
|
||||
batch_size: 32
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.0001
|
||||
|
@ -55,7 +55,7 @@ load_path: "The location of checkpoint for obs"
|
|||
device_target: "Target device type, available: [Ascend, GPU, CPU]"
|
||||
enable_profiling: "Whether enable profiling while training, default: False"
|
||||
num_classes: "Class for dataset"
|
||||
global_batch_size: "The total batch_size for training and evaluation."
|
||||
batch_size: "Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num."
|
||||
epoch_size: "Total training epochs."
|
||||
keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
|
|
|
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_residual_imagenet-300_5004.ckpt"
|
|||
net_name: "suqeezenet_residual"
|
||||
dataset : "imagenet"
|
||||
class_num: 1000
|
||||
global_batch_size: 256
|
||||
batch_size: 32
|
||||
loss_scale: 1024
|
||||
momentum: 0.9
|
||||
weight_decay: 0.00007
|
||||
|
@ -57,7 +57,7 @@ load_path: "The location of checkpoint for obs"
|
|||
device_target: "Target device type, available: [Ascend, GPU, CPU]"
|
||||
enable_profiling: "Whether enable profiling while training, default: False"
|
||||
num_classes: "Class for dataset"
|
||||
global_batch_size: "The total batch_size for training and evaluation"
|
||||
batch_size: "Batch_size for training, evaluation and export"
|
||||
epoch_size: "Total training epochs."
|
||||
keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
|
||||
checkpoint_path: "The location of the checkpoint file."
|
||||
|
|
|
@ -76,15 +76,12 @@ def train_net():
|
|||
gradients_mean=True)
|
||||
ckpt_save_dir = ckpt_save_dir + "/ckpt_" + str(
|
||||
get_rank()) + "/"
|
||||
# obtain the actual batch_size
|
||||
if not hasattr(config, "global_batch_size"):
|
||||
raise AttributeError("'config' object has no attribute 'global_batch_size', please check the yaml file.")
|
||||
batch_size = max(config.global_batch_size // device_num, 1)
|
||||
|
||||
# create dataset
|
||||
dataset = create_dataset(dataset_path=config.data_path,
|
||||
do_train=True,
|
||||
repeat_num=1,
|
||||
batch_size=batch_size,
|
||||
batch_size=config.batch_size,
|
||||
target=target)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
|
|
Loading…
Reference in New Issue