!16216 alexnet test

From: @huchunmei Reviewed-by: @oacjiewen,@c_34 Signed-off-by: @c_34
2021-05-12 16:02:38 +08:00 · 2021-05-12 16:02:38 +08:00 · 60d71fc599
parent c8ef2924a9 c1b91ff791
commit 60d71fc599
17 changed files with 123 additions and 137 deletions
--- a/model_zoo/official/cv/alexnet/config_imagenet.yaml
+++ b/model_zoo/official/cv/alexnet/config_imagenet.yaml
@ -11,9 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
 device_target: Ascend
 enable_profiling: False

-data_path_local: '/data/hcm/data/ImageNet_Original/'
-ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
-
+ckpt_path: "/cache/data"
+ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
 # ==============================================================================
 # Training options
 num_classes: 1000
--- a/model_zoo/official/cv/alexnet/default_config.yaml
+++ b/model_zoo/official/cv/alexnet/default_config.yaml
@ -11,8 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
 device_target: Ascend
 enable_profiling: False

-data_path_local: '/data/hcm/data/cifar-10-batches-bin/'
-ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
+ckpt_path: "/cache/data"
+ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
 # ==============================================================================
 # Training options
 epoch_size: 30
--- a/model_zoo/official/cv/alexnet/eval.py
+++ b/model_zoo/official/cv/alexnet/eval.py
@ -18,15 +18,12 @@ eval alexnet according to model file:
 python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
 """

-import os
-# import sys
-# sys.path.append(os.path.join(os.getcwd(), 'utils'))
-from utils.config import config
-from utils.moxing_adapter import moxing_wrapper
-from utils.device_adapter import get_device_id, get_device_num
-
+from src.model_utils.config import config
+from src.model_utils.moxing_adapter import moxing_wrapper
+from src.model_utils.device_adapter import get_device_id, get_device_num
 from src.dataset import create_dataset_cifar10, create_dataset_imagenet
 from src.alexnet import AlexNet
+
 import mindspore.nn as nn
 from mindspore import context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
@ -35,14 +32,8 @@ from mindspore.nn.metrics import Accuracy
 from mindspore.communication.management import init


-if os.path.exists(config.data_path_local):
-    config.data_path = config.data_path_local
-    load_path = config.ckpt_path_local
-else:
-    load_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
-
 def modelarts_process():
-    pass
+    config.ckpt_path = config.ckpt_file

@moxing_wrapper(pre_process=modelarts_process)
 def eval_alexnet():
@ -64,8 +55,8 @@ def eval_alexnet():
        opt = nn.Momentum(network.trainable_params(), config.learning_rate, config.momentum)
        ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
            target=config.device_target)
-        param_dict = load_checkpoint(load_path)
-        print("load checkpoint from [{}].".format(load_path))
+        param_dict = load_checkpoint(config.ckpt_path)
+        print("load checkpoint from [{}].".format(config.ckpt_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
@ -74,8 +65,8 @@ def eval_alexnet():
        network = AlexNet(config.num_classes, phase='test')
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        ds_eval = create_dataset_imagenet(config.data_path, config.batch_size, training=False)
-        param_dict = load_checkpoint(load_path)
-        print("load checkpoint from [{}].".format(load_path))
+        param_dict = load_checkpoint(config.ckpt_path)
+        print("load checkpoint from [{}].".format(config.ckpt_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
--- a/model_zoo/official/cv/alexnet/export.py
+++ b/model_zoo/official/cv/alexnet/export.py
@ -17,22 +17,14 @@
 python export.py
 """

-import os
-# import sys
-# sys.path.append(os.path.join(os.getcwd(), 'utils'))
-from utils.config import config
+from src.model_utils.config import config
+from src.alexnet import AlexNet

 import numpy as np
 import mindspore as ms
 from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export
-from src.alexnet import AlexNet


-if os.path.exists(config.data_path_local):
-    ckpt_path = config.ckpt_path_local
-else:
-    ckpt_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
-
 context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
 if config.device_target == "Ascend":
    context.set_context(device_id=config.device_id)
@ -40,7 +32,7 @@ if config.device_target == "Ascend":
 if __name__ == '__main__':
    net = AlexNet(num_classes=config.num_classes)

-    param_dict = load_checkpoint(ckpt_path)
+    param_dict = load_checkpoint(config.ckpt_file)
    load_param_into_net(net, param_dict)

    input_arr = Tensor(np.zeros([config.batch_size, 3, config.image_height, config.image_width]), ms.float32)
--- a/model_zoo/official/cv/alexnet/scripts/run_distribution_ascend.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_distribution_ascend.sh
@ -14,9 +14,9 @@
 # limitations under the License.
 # ============================================================================
 # an simple tutorial as follows, more parameters can be setting
-if [ $# != 3 ]
+if [ $# != 4 ]
 then
-    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH]"
+    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
 exit 1
 fi

@ -26,6 +26,20 @@ then
 exit 1
 fi

+BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
+if [ $# -ge 1 ]; then
+  if [ $2 == 'imagenet' ]; then
+    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
+  elif [ $2 == 'cifar10' ]; then
+    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+  else
+    echo "Unrecognized parameter"
+    exit 1
+  fi
+else
+  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+fi
+
 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
@ -33,6 +47,7 @@ RANK_TABLE_FILE=$(realpath $1)
 export RANK_TABLE_FILE
 export DATASET_NAME=$2
 export DATA_PATH=$3
+export CKPT_PATH=$4
 echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"

 export SERVER_ID=0
@ -43,11 +58,12 @@ do
    export RANK_ID=$((rank_start + i))
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
-    cp -r ./src ./train_parallel$i
-    cp ./train.py ./train_parallel$i
+    cp -r ../src ./train_parallel$i
+    cp ../train.py ./train_parallel$i
    echo "start training for rank $RANK_ID, device $DEVICE_ID"
    cd ./train_parallel$i ||exit
    env > env.log
-    python train.py --device_id=$i --dataset_name=$DATASET_NAME --data_path=$DATA_PATH > log 2>&1 &
+    python ../../train.py --config_path=$CONFIG_FILE --device_id=$i --dataset_name=$DATASET_NAME \
+    --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH > log 2>&1 &
    cd ..
 done
--- a/model_zoo/official/cv/alexnet/scripts/run_eval_standalone_ascend.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_eval_standalone_ascend.sh
@ -1,35 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-# an simple tutorial as follows, more parameters can be setting
-# echo "Usage: sh run_standalone_eval_ascend.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH] [DEVICE_ID]"
-
-BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
-
-if [ $# -ge 1 ]; then
-  if [ $1 == 'imagenet' ]; then
-    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
-  elif [ $1 == 'cifar10' ]; then
-    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
-  else
-    echo "Unrecognized parameter"
-    exit 1
-  fi
-else
-  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
-fi
-
-# python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
-python ../eval.py --config_path=$CONFIG_FILE > eval_log 2>&1 &
--- a/model_zoo/official/cv/alexnet/scripts/run_standalone_eval_ascend.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_standalone_eval_ascend.sh
@ -25,5 +25,22 @@ export DATA_PATH=$2
 export CKPT_PATH=$3
 export DEVICE_ID=$4

-python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
-               --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
+BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
+
+if [ $# -ge 1 ]; then
+  if [ $1 == 'imagenet' ]; then
+    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
+  elif [ $1 == 'cifar10' ]; then
+    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+  else
+    echo "Unrecognized parameter"
+    exit 1
+  fi
+else
+  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+fi
+
+python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
+    --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
+    --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
+    
--- a/model_zoo/official/cv/alexnet/scripts/run_standalone_eval_gpu.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_standalone_eval_gpu.sh
@ -25,5 +25,21 @@ export DATA_PATH=$2
 export CKPT_PATH=$3
 export DEVICE_ID=$4

-python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
-               --device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &
+BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
+
+if [ $# -ge 1 ]; then
+  if [ $1 == 'imagenet' ]; then
+    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
+  elif [ $1 == 'cifar10' ]; then
+    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+  else
+    echo "Unrecognized parameter"
+    exit 1
+  fi
+else
+  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+fi
+
+python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
+    --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
+    --device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &
--- a/model_zoo/official/cv/alexnet/scripts/run_standalone_train_ascend.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_standalone_train_ascend.sh
@ -14,15 +14,32 @@
 # limitations under the License.
 # ============================================================================
 # an simple tutorial as follows, more parameters can be setting
-if [ $# != 3 ]
+if [ $# != 4 ]
 then
-    echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
+    echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID] [CKPT_PATH]"
 exit 1
 fi

 export DATASET_NAME=$1
 export DATA_PATH=$2
 export DEVICE_ID=$3
+export CKPT_PATH=$4

-python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
-               --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
+BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
+
+if [ $# -ge 1 ]; then
+  if [ $1 == 'imagenet' ]; then
+    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
+  elif [ $1 == 'cifar10' ]; then
+    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+  else
+    echo "Unrecognized parameter"
+    exit 1
+  fi
+else
+  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+fi
+
+python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
+--ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
+               
--- a/model_zoo/official/cv/alexnet/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_standalone_train_gpu.sh
@ -14,14 +14,30 @@
 # limitations under the License.
 # ============================================================================
 # an simple tutorial as follows, more parameters can be setting
-if [ $# != 2 ]
+if [ $# != 3 ]
 then
-    echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH]"
+    echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
 exit 1
 fi

 export DATASET_NAME=$1
 export DATA_PATH=$2
+export CKPT_PATH=$3

-python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
-               --device_target="GPU" > log 2>&1 &
+BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
+
+if [ $# -ge 1 ]; then
+  if [ $1 == 'imagenet' ]; then
+    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
+  elif [ $1 == 'cifar10' ]; then
+    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+  else
+    echo "Unrecognized parameter"
+    exit 1
+  fi
+else
+  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
+fi
+
+python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
+--ckpt_path=$CKPT_PATH --device_target="GPU" > log 2>&1 &
--- a/model_zoo/official/cv/alexnet/scripts/run_train_standalone_ascend.sh
+++ b/model_zoo/official/cv/alexnet/scripts/run_train_standalone_ascend.sh
@ -1,35 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-# an simple tutorial as follows, more parameters can be setting
-# echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
-
-BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
-
-if [ $# -ge 1 ]; then
-  if [ $1 == 'imagenet' ]; then
-    CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
-  elif [ $1 == 'cifar10' ]; then
-    CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
-  else
-    echo "Unrecognized parameter"
-    exit 1
-  fi
-else
-  CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
-fi
-
-# python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
-python ../train.py --config_path=$CONFIG_FILE > log 2>&1 &
--- a/model_zoo/official/cv/alexnet/src/model_utils/init.py
+++ b/model_zoo/official/cv/alexnet/src/model_utils/init.py
--- a/model_zoo/official/cv/alexnet/src/model_utils/config.py
+++ b/model_zoo/official/cv/alexnet/src/model_utils/config.py
@ -115,7 +115,7 @@ def get_config():
    """
    parser = argparse.ArgumentParser(description="default name", add_help=False)
    current_dir = os.path.dirname(os.path.abspath(__file__))
-    parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
+    parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../default_config.yaml"),
                        help="Config file path")
    path_args, _ = parser.parse_known_args()
    default, helper, choices = parse_yaml(path_args.config_path)
--- a/model_zoo/official/cv/alexnet/src/model_utils/device_adapter.py
+++ b/model_zoo/official/cv/alexnet/src/model_utils/device_adapter.py
--- a/model_zoo/official/cv/alexnet/src/model_utils/local_adapter.py
+++ b/model_zoo/official/cv/alexnet/src/model_utils/local_adapter.py
--- a/model_zoo/official/cv/alexnet/src/model_utils/moxing_adapter.py
+++ b/model_zoo/official/cv/alexnet/src/model_utils/moxing_adapter.py
--- a/model_zoo/official/cv/alexnet/train.py
+++ b/model_zoo/official/cv/alexnet/train.py
@ -19,17 +19,14 @@ python train.py --data_path /YourDataPath
 """

 import os
-# import sys
-# sys.path.append(os.path.join(os.getcwd(), 'utils'))
-from utils.config import config
-from utils.moxing_adapter import moxing_wrapper
-from utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
-
-# from src.config import alexnet_cifar10_config, alexnet_imagenet_config
+from src.model_utils.config import config
+from src.model_utils.moxing_adapter import moxing_wrapper
+from src.model_utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
 from src.dataset import create_dataset_cifar10, create_dataset_imagenet
 from src.generator_lr import get_lr_cifar10, get_lr_imagenet
 from src.alexnet import AlexNet
 from src.get_param_groups import get_param_groups
+
 import mindspore.nn as nn
 from mindspore.communication.management import init, get_rank
 from mindspore import dataset as de
@ -44,14 +41,9 @@ from mindspore.common import set_seed
 set_seed(1)
 de.config.set_seed(1)

-if os.path.exists(config.data_path_local):
-    config.data_path = config.data_path_local
-    config.checkpoint_path = os.path.join(config.checkpoint_path, str(get_rank_id()))
-else:
-    config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path, str(get_rank_id()))
-
 def modelarts_pre_process():
    pass
+    # config.ckpt_path = os.path.join(config.output_path, str(get_rank_id()), config.checkpoint_path)

@moxing_wrapper(pre_process=modelarts_pre_process)
 def train_alexnet():
@ -135,9 +127,9 @@ def train_alexnet():
        raise ValueError("Unsupported platform.")

    if device_num > 1:
-        ckpt_save_dir = os.path.join(config.checkpoint_path + "_" + str(get_rank()))
+        ckpt_save_dir = os.path.join(config.ckpt_path + "_" + str(get_rank()))
    else:
-        ckpt_save_dir = config.checkpoint_path
+        ckpt_save_dir = config.ckpt_path

    time_cb = TimeMonitor(data_size=step_per_epoch)
    config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,