modify for Squeezenet_gpu

2021-09-01 15:56:11 +08:00 · 2021-09-01 15:56:11 +08:00 · a8c9859141
parent 1518006924
commit a8c9859141
6 changed files with 14 additions and 17 deletions
--- a/model_zoo/official/cv/squeezenet/README.md
+++ b/model_zoo/official/cv/squeezenet/README.md
@ -198,7 +198,7 @@ Parameters for both training and evaluation can be set in *.yaml

  ```py
  "class_num": 10,                  # dataset class num
-  "global_batch_size": 32,          # the total batch_size for training and evaluation
+  "batch_size": 32,                 # Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num.
  "loss_scale": 1024,               # loss scale
  "momentum": 0.9,                  # momentum
  "weight_decay": 1e-4,             # weight decay
@ -219,7 +219,7 @@ Parameters for both training and evaluation can be set in *.yaml

  ```py
  "class_num": 1000,                # dataset class num
-  "global_batch_size": 256,         # the total batch_size for training and evaluation
+  "batch_size": 32,                 # Batch_size for training, evaluation and export
  "loss_scale": 1024,               # loss scale
  "momentum": 0.9,                  # momentum
  "weight_decay": 7e-5,             # weight decay
@ -242,7 +242,7 @@ Parameters for both training and evaluation can be set in *.yaml

  ```py
  "class_num": 10,                  # dataset class num
-  "global_batch_size": 32,          # the total batch_size for training and evaluation
+  "batch_size": 32,                 # Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num.
  "loss_scale": 1024,               # loss scale
  "momentum": 0.9,                  # momentum
  "weight_decay": 1e-4,             # weight decay
@ -263,7 +263,7 @@ Parameters for both training and evaluation can be set in *.yaml

  ```py
  "class_num": 1000,                # dataset class num
-  "global_batch_size": 256,         # The total batch_size for training and evaluation
+  "batch_size": 32,                 # Batch_size for training, evaluation and export
  "loss_scale": 1024,               # loss scale
  "momentum": 0.9,                  # momentum
  "weight_decay": 7e-5,             # weight decay
--- a/model_zoo/official/cv/squeezenet/squeezenet_cifar10_config.yaml
+++ b/model_zoo/official/cv/squeezenet/squeezenet_cifar10_config.yaml
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_cifar10-120_195.ckpt"
 net_name: "suqeezenet"
 dataset : "cifar10"
 class_num: 10
-global_batch_size: 32
+batch_size: 32
 loss_scale: 1024
 momentum: 0.9
 weight_decay: 0.0001
@ -55,7 +55,7 @@ load_path: "The location of checkpoint for obs"
 device_target: "Target device type, available: [Ascend, GPU, CPU]"
 enable_profiling: "Whether enable profiling while training, default: False"
 num_classes: "Class for dataset"
-global_batch_size: "The total batch_size for training and evaluation"
+batch_size: "Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num"
 epoch_size: "Total training epochs."
 keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
 checkpoint_path: "The location of the checkpoint file."
--- a/model_zoo/official/cv/squeezenet/squeezenet_imagenet_config.yaml
+++ b/model_zoo/official/cv/squeezenet/squeezenet_imagenet_config.yaml
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_imagenet-200_5004.ckpt"
 net_name: "suqeezenet"
 dataset : "imagenet"
 class_num: 1000
-global_batch_size: 256
+batch_size: 32
 loss_scale: 1024
 momentum: 0.9
 weight_decay: 0.00007
@ -57,7 +57,7 @@ load_path: 'The location of checkpoint for obs'
 device_target: 'Target device type, available: [Ascend, GPU, CPU]'
 enable_profiling: 'Whether enable profiling while training, default: False'
 num_classes: 'Class for dataset'
-global_batch_size: "The total batch_size for training and evaluation"
+batch_size: "Batch_size for training, evaluation and export"
 epoch_size: "Total training epochs."
 keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
 checkpoint_path: "The location of the checkpoint file."
--- a/model_zoo/official/cv/squeezenet/squeezenet_residual_cifar10_config.yaml
+++ b/model_zoo/official/cv/squeezenet/squeezenet_residual_cifar10_config.yaml
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_residual_cifar10-150_195.ckpt"
 net_name: "suqeezenet_residual"
 dataset : "cifar10"
 class_num: 10
-global_batch_size: 32
+batch_size: 32
 loss_scale: 1024
 momentum: 0.9
 weight_decay: 0.0001
@ -55,7 +55,7 @@ load_path: "The location of checkpoint for obs"
 device_target: "Target device type, available: [Ascend, GPU, CPU]"
 enable_profiling: "Whether enable profiling while training, default: False"
 num_classes: "Class for dataset"
-global_batch_size: "The total batch_size for training and evaluation."
+batch_size: "Batch_size for training, evaluation and export. If running distributed on gpu, divide this value by device_num."
 epoch_size: "Total training epochs."
 keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
 checkpoint_path: "The location of the checkpoint file."
--- a/model_zoo/official/cv/squeezenet/squeezenet_residual_imagenet_config.yaml
+++ b/model_zoo/official/cv/squeezenet/squeezenet_residual_imagenet_config.yaml
@ -21,7 +21,7 @@ checkpoint_file_path: "suqeezenet_residual_imagenet-300_5004.ckpt"
 net_name: "suqeezenet_residual"
 dataset : "imagenet"
 class_num: 1000
-global_batch_size: 256
+batch_size: 32
 loss_scale: 1024
 momentum: 0.9
 weight_decay: 0.00007
@ -57,7 +57,7 @@ load_path: "The location of checkpoint for obs"
 device_target: "Target device type, available: [Ascend, GPU, CPU]"
 enable_profiling: "Whether enable profiling while training, default: False"
 num_classes: "Class for dataset"
-global_batch_size: "The total batch_size for training and evaluation"
+batch_size: "Batch_size for training, evaluation and export"
 epoch_size: "Total training epochs."
 keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
 checkpoint_path: "The location of the checkpoint file."
--- a/model_zoo/official/cv/squeezenet/train.py
+++ b/model_zoo/official/cv/squeezenet/train.py
@ -76,15 +76,12 @@ def train_net():
                gradients_mean=True)
        ckpt_save_dir = ckpt_save_dir + "/ckpt_" + str(
            get_rank()) + "/"
-    # obtain the actual batch_size
-    if not hasattr(config, "global_batch_size"):
-        raise AttributeError("'config' object has no attribute 'global_batch_size', please check the yaml file.")
-    batch_size = max(config.global_batch_size // device_num, 1)
+
    # create dataset
    dataset = create_dataset(dataset_path=config.data_path,
                             do_train=True,
                             repeat_num=1,
-                             batch_size=batch_size,
+                             batch_size=config.batch_size,
                             target=target)
    step_size = dataset.get_dataset_size()