!16216 alexnet test

From: @huchunmei
Reviewed-by: @oacjiewen,@c_34
Signed-off-by: @c_34
This commit is contained in:
mindspore-ci-bot 2021-05-12 16:02:38 +08:00 committed by Gitee
commit 60d71fc599
17 changed files with 123 additions and 137 deletions

View File

@ -11,9 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
device_target: Ascend
enable_profiling: False
data_path_local: '/data/hcm/data/ImageNet_Original/'
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
ckpt_path: "/cache/data"
ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
# ==============================================================================
# Training options
num_classes: 1000

View File

@ -11,8 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
device_target: Ascend
enable_profiling: False
data_path_local: '/data/hcm/data/cifar-10-batches-bin/'
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
ckpt_path: "/cache/data"
ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
# ==============================================================================
# Training options
epoch_size: 30

View File

@ -18,15 +18,12 @@ eval alexnet according to model file:
python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
"""
import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper
from utils.device_adapter import get_device_id, get_device_num
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id, get_device_num
from src.dataset import create_dataset_cifar10, create_dataset_imagenet
from src.alexnet import AlexNet
import mindspore.nn as nn
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net
@ -35,14 +32,8 @@ from mindspore.nn.metrics import Accuracy
from mindspore.communication.management import init
if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
load_path = config.ckpt_path_local
else:
load_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
def modelarts_process():
pass
config.ckpt_path = config.ckpt_file
@moxing_wrapper(pre_process=modelarts_process)
def eval_alexnet():
@ -64,8 +55,8 @@ def eval_alexnet():
opt = nn.Momentum(network.trainable_params(), config.learning_rate, config.momentum)
ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
target=config.device_target)
param_dict = load_checkpoint(load_path)
print("load checkpoint from [{}].".format(load_path))
param_dict = load_checkpoint(config.ckpt_path)
print("load checkpoint from [{}].".format(config.ckpt_path))
load_param_into_net(network, param_dict)
network.set_train(False)
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
@ -74,8 +65,8 @@ def eval_alexnet():
network = AlexNet(config.num_classes, phase='test')
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
ds_eval = create_dataset_imagenet(config.data_path, config.batch_size, training=False)
param_dict = load_checkpoint(load_path)
print("load checkpoint from [{}].".format(load_path))
param_dict = load_checkpoint(config.ckpt_path)
print("load checkpoint from [{}].".format(config.ckpt_path))
load_param_into_net(network, param_dict)
network.set_train(False)
model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})

View File

@ -17,22 +17,14 @@
python export.py
"""
import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from src.model_utils.config import config
from src.alexnet import AlexNet
import numpy as np
import mindspore as ms
from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export
from src.alexnet import AlexNet
if os.path.exists(config.data_path_local):
ckpt_path = config.ckpt_path_local
else:
ckpt_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
if config.device_target == "Ascend":
context.set_context(device_id=config.device_id)
@ -40,7 +32,7 @@ if config.device_target == "Ascend":
if __name__ == '__main__':
net = AlexNet(num_classes=config.num_classes)
param_dict = load_checkpoint(ckpt_path)
param_dict = load_checkpoint(config.ckpt_file)
load_param_into_net(net, param_dict)
input_arr = Tensor(np.zeros([config.batch_size, 3, config.image_height, config.image_width]), ms.float32)

View File

@ -14,9 +14,9 @@
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
if [ $# != 3 ]
if [ $# != 4 ]
then
echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH]"
echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
exit 1
fi
@ -26,6 +26,20 @@ then
exit 1
fi
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $2 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $2 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
@ -33,6 +47,7 @@ RANK_TABLE_FILE=$(realpath $1)
export RANK_TABLE_FILE
export DATASET_NAME=$2
export DATA_PATH=$3
export CKPT_PATH=$4
echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
export SERVER_ID=0
@ -43,11 +58,12 @@ do
export RANK_ID=$((rank_start + i))
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp -r ./src ./train_parallel$i
cp ./train.py ./train_parallel$i
cp -r ../src ./train_parallel$i
cp ../train.py ./train_parallel$i
echo "start training for rank $RANK_ID, device $DEVICE_ID"
cd ./train_parallel$i ||exit
env > env.log
python train.py --device_id=$i --dataset_name=$DATASET_NAME --data_path=$DATA_PATH > log 2>&1 &
python ../../train.py --config_path=$CONFIG_FILE --device_id=$i --dataset_name=$DATASET_NAME \
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH > log 2>&1 &
cd ..
done

View File

@ -1,35 +0,0 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
# echo "Usage: sh run_standalone_eval_ascend.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH] [DEVICE_ID]"
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
# python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
python ../eval.py --config_path=$CONFIG_FILE > eval_log 2>&1 &

View File

@ -25,5 +25,22 @@ export DATA_PATH=$2
export CKPT_PATH=$3
export DEVICE_ID=$4
python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
--device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
--device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &

View File

@ -25,5 +25,21 @@ export DATA_PATH=$2
export CKPT_PATH=$3
export DEVICE_ID=$4
python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
--device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
--device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &

View File

@ -14,15 +14,32 @@
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
if [ $# != 3 ]
if [ $# != 4 ]
then
echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID] [CKPT_PATH]"
exit 1
fi
export DATASET_NAME=$1
export DATA_PATH=$2
export DEVICE_ID=$3
export CKPT_PATH=$4
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
--device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
--ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &

View File

@ -14,14 +14,30 @@
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
if [ $# != 2 ]
if [ $# != 3 ]
then
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH]"
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
exit 1
fi
export DATASET_NAME=$1
export DATA_PATH=$2
export CKPT_PATH=$3
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
--device_target="GPU" > log 2>&1 &
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
--ckpt_path=$CKPT_PATH --device_target="GPU" > log 2>&1 &

View File

@ -1,35 +0,0 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# an simple tutorial as follows, more parameters can be setting
# echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
if [ $# -ge 1 ]; then
if [ $1 == 'imagenet' ]; then
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
elif [ $1 == 'cifar10' ]; then
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
else
echo "Unrecognized parameter"
exit 1
fi
else
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
fi
# python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
python ../train.py --config_path=$CONFIG_FILE > log 2>&1 &

View File

@ -115,7 +115,7 @@ def get_config():
"""
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../default_config.yaml"),
help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)

View File

@ -19,17 +19,14 @@ python train.py --data_path /YourDataPath
"""
import os
# import sys
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
from utils.config import config
from utils.moxing_adapter import moxing_wrapper
from utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
# from src.config import alexnet_cifar10_config, alexnet_imagenet_config
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
from src.dataset import create_dataset_cifar10, create_dataset_imagenet
from src.generator_lr import get_lr_cifar10, get_lr_imagenet
from src.alexnet import AlexNet
from src.get_param_groups import get_param_groups
import mindspore.nn as nn
from mindspore.communication.management import init, get_rank
from mindspore import dataset as de
@ -44,14 +41,9 @@ from mindspore.common import set_seed
set_seed(1)
de.config.set_seed(1)
if os.path.exists(config.data_path_local):
config.data_path = config.data_path_local
config.checkpoint_path = os.path.join(config.checkpoint_path, str(get_rank_id()))
else:
config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path, str(get_rank_id()))
def modelarts_pre_process():
pass
# config.ckpt_path = os.path.join(config.output_path, str(get_rank_id()), config.checkpoint_path)
@moxing_wrapper(pre_process=modelarts_pre_process)
def train_alexnet():
@ -135,9 +127,9 @@ def train_alexnet():
raise ValueError("Unsupported platform.")
if device_num > 1:
ckpt_save_dir = os.path.join(config.checkpoint_path + "_" + str(get_rank()))
ckpt_save_dir = os.path.join(config.ckpt_path + "_" + str(get_rank()))
else:
ckpt_save_dir = config.checkpoint_path
ckpt_save_dir = config.ckpt_path
time_cb = TimeMonitor(data_size=step_per_epoch)
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,