forked from mindspore-Ecosystem/mindspore
!16216 alexnet test
From: @huchunmei Reviewed-by: @oacjiewen,@c_34 Signed-off-by: @c_34
This commit is contained in:
commit
60d71fc599
|
@ -11,9 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
|
|||
device_target: Ascend
|
||||
enable_profiling: False
|
||||
|
||||
data_path_local: '/data/hcm/data/ImageNet_Original/'
|
||||
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
|
||||
|
||||
ckpt_path: "/cache/data"
|
||||
ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
|
||||
# ==============================================================================
|
||||
# Training options
|
||||
num_classes: 1000
|
||||
|
|
|
@ -11,8 +11,8 @@ checkpoint_file: './checkpoint/checkpoint_alexnet-30_1562.ckpt'
|
|||
device_target: Ascend
|
||||
enable_profiling: False
|
||||
|
||||
data_path_local: '/data/hcm/data/cifar-10-batches-bin/'
|
||||
ckpt_path_local: '/data/hcm/data/ckpt_alexnet/checkpoint_alexnet-30_1562.ckpt'
|
||||
ckpt_path: "/cache/data"
|
||||
ckpt_file: "/cache/data/checkpoint_alexnet-30_1562.ckpt"
|
||||
# ==============================================================================
|
||||
# Training options
|
||||
epoch_size: 30
|
||||
|
|
|
@ -18,15 +18,12 @@ eval alexnet according to model file:
|
|||
python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
|
||||
"""
|
||||
|
||||
import os
|
||||
# import sys
|
||||
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
|
||||
from utils.config import config
|
||||
from utils.moxing_adapter import moxing_wrapper
|
||||
from utils.device_adapter import get_device_id, get_device_num
|
||||
|
||||
from src.model_utils.config import config
|
||||
from src.model_utils.moxing_adapter import moxing_wrapper
|
||||
from src.model_utils.device_adapter import get_device_id, get_device_num
|
||||
from src.dataset import create_dataset_cifar10, create_dataset_imagenet
|
||||
from src.alexnet import AlexNet
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore import context
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
|
@ -35,14 +32,8 @@ from mindspore.nn.metrics import Accuracy
|
|||
from mindspore.communication.management import init
|
||||
|
||||
|
||||
if os.path.exists(config.data_path_local):
|
||||
config.data_path = config.data_path_local
|
||||
load_path = config.ckpt_path_local
|
||||
else:
|
||||
load_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
|
||||
|
||||
def modelarts_process():
|
||||
pass
|
||||
config.ckpt_path = config.ckpt_file
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_process)
|
||||
def eval_alexnet():
|
||||
|
@ -64,8 +55,8 @@ def eval_alexnet():
|
|||
opt = nn.Momentum(network.trainable_params(), config.learning_rate, config.momentum)
|
||||
ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
|
||||
target=config.device_target)
|
||||
param_dict = load_checkpoint(load_path)
|
||||
print("load checkpoint from [{}].".format(load_path))
|
||||
param_dict = load_checkpoint(config.ckpt_path)
|
||||
print("load checkpoint from [{}].".format(config.ckpt_path))
|
||||
load_param_into_net(network, param_dict)
|
||||
network.set_train(False)
|
||||
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
|
||||
|
@ -74,8 +65,8 @@ def eval_alexnet():
|
|||
network = AlexNet(config.num_classes, phase='test')
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
|
||||
ds_eval = create_dataset_imagenet(config.data_path, config.batch_size, training=False)
|
||||
param_dict = load_checkpoint(load_path)
|
||||
print("load checkpoint from [{}].".format(load_path))
|
||||
param_dict = load_checkpoint(config.ckpt_path)
|
||||
print("load checkpoint from [{}].".format(config.ckpt_path))
|
||||
load_param_into_net(network, param_dict)
|
||||
network.set_train(False)
|
||||
model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
|
||||
|
|
|
@ -17,22 +17,14 @@
|
|||
python export.py
|
||||
"""
|
||||
|
||||
import os
|
||||
# import sys
|
||||
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
|
||||
from utils.config import config
|
||||
from src.model_utils.config import config
|
||||
from src.alexnet import AlexNet
|
||||
|
||||
import numpy as np
|
||||
import mindspore as ms
|
||||
from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export
|
||||
from src.alexnet import AlexNet
|
||||
|
||||
|
||||
if os.path.exists(config.data_path_local):
|
||||
ckpt_path = config.ckpt_path_local
|
||||
else:
|
||||
ckpt_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt')
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
|
||||
if config.device_target == "Ascend":
|
||||
context.set_context(device_id=config.device_id)
|
||||
|
@ -40,7 +32,7 @@ if config.device_target == "Ascend":
|
|||
if __name__ == '__main__':
|
||||
net = AlexNet(num_classes=config.num_classes)
|
||||
|
||||
param_dict = load_checkpoint(ckpt_path)
|
||||
param_dict = load_checkpoint(config.ckpt_file)
|
||||
load_param_into_net(net, param_dict)
|
||||
|
||||
input_arr = Tensor(np.zeros([config.batch_size, 3, config.image_height, config.image_width]), ms.float32)
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
if [ $# != 3 ]
|
||||
if [ $# != 4 ]
|
||||
then
|
||||
echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH]"
|
||||
echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
@ -26,6 +26,20 @@ then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $2 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $2 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=8
|
||||
export RANK_SIZE=8
|
||||
|
@ -33,6 +47,7 @@ RANK_TABLE_FILE=$(realpath $1)
|
|||
export RANK_TABLE_FILE
|
||||
export DATASET_NAME=$2
|
||||
export DATA_PATH=$3
|
||||
export CKPT_PATH=$4
|
||||
echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
|
||||
|
||||
export SERVER_ID=0
|
||||
|
@ -43,11 +58,12 @@ do
|
|||
export RANK_ID=$((rank_start + i))
|
||||
rm -rf ./train_parallel$i
|
||||
mkdir ./train_parallel$i
|
||||
cp -r ./src ./train_parallel$i
|
||||
cp ./train.py ./train_parallel$i
|
||||
cp -r ../src ./train_parallel$i
|
||||
cp ../train.py ./train_parallel$i
|
||||
echo "start training for rank $RANK_ID, device $DEVICE_ID"
|
||||
cd ./train_parallel$i ||exit
|
||||
env > env.log
|
||||
python train.py --device_id=$i --dataset_name=$DATASET_NAME --data_path=$DATA_PATH > log 2>&1 &
|
||||
python ../../train.py --config_path=$CONFIG_FILE --device_id=$i --dataset_name=$DATASET_NAME \
|
||||
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH > log 2>&1 &
|
||||
cd ..
|
||||
done
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
# echo "Usage: sh run_standalone_eval_ascend.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH] [DEVICE_ID]"
|
||||
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
# python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
|
||||
python ../eval.py --config_path=$CONFIG_FILE > eval_log 2>&1 &
|
|
@ -25,5 +25,22 @@ export DATA_PATH=$2
|
|||
export CKPT_PATH=$3
|
||||
export DEVICE_ID=$4
|
||||
|
||||
python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
|
||||
--device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
|
||||
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
|
||||
--device_id=$DEVICE_ID --device_target="Ascend" > eval_log 2>&1 &
|
||||
|
|
@ -25,5 +25,21 @@ export DATA_PATH=$2
|
|||
export CKPT_PATH=$3
|
||||
export DEVICE_ID=$4
|
||||
|
||||
python eval.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
|
||||
--device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
python ../eval.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME \
|
||||
--data_path=$DATA_PATH --ckpt_path=$CKPT_PATH \
|
||||
--device_id=$DEVICE_ID --device_target="GPU" > eval_log 2>&1 &
|
||||
|
|
|
@ -14,15 +14,32 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
if [ $# != 3 ]
|
||||
if [ $# != 4 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
|
||||
echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID] [CKPT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export DATASET_NAME=$1
|
||||
export DATA_PATH=$2
|
||||
export DEVICE_ID=$3
|
||||
export CKPT_PATH=$4
|
||||
|
||||
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
||||
--device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
||||
--ckpt_path=$CKPT_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
|
||||
|
|
@ -14,14 +14,30 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
if [ $# != 2 ]
|
||||
if [ $# != 3 ]
|
||||
then
|
||||
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH]"
|
||||
echo "Usage: sh run_standalone_train_gpu.sh [cifar10|imagenet] [DATA_PATH] [CKPT_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export DATASET_NAME=$1
|
||||
export DATA_PATH=$2
|
||||
export CKPT_PATH=$3
|
||||
|
||||
python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
||||
--device_target="GPU" > log 2>&1 &
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
python ../train.py --config_path=$CONFIG_FILE --dataset_name=$DATASET_NAME --data_path=$DATA_PATH \
|
||||
--ckpt_path=$CKPT_PATH --device_target="GPU" > log 2>&1 &
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
# echo "Usage: sh run_standalone_train_ascend.sh [cifar10|imagenet] [DATA_PATH] [DEVICE_ID]"
|
||||
|
||||
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
|
||||
|
||||
if [ $# -ge 1 ]; then
|
||||
if [ $1 == 'imagenet' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../config_imagenet.yaml"
|
||||
elif [ $1 == 'cifar10' ]; then
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
else
|
||||
echo "Unrecognized parameter"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
|
||||
fi
|
||||
|
||||
# python train.py --dataset_name=$DATASET_NAME --data_path=$DATA_PATH --device_id=$DEVICE_ID --device_target="Ascend" > log 2>&1 &
|
||||
python ../train.py --config_path=$CONFIG_FILE > log 2>&1 &
|
|
@ -115,7 +115,7 @@ def get_config():
|
|||
"""
|
||||
parser = argparse.ArgumentParser(description="default name", add_help=False)
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../default_config.yaml"),
|
||||
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../default_config.yaml"),
|
||||
help="Config file path")
|
||||
path_args, _ = parser.parse_known_args()
|
||||
default, helper, choices = parse_yaml(path_args.config_path)
|
|
@ -19,17 +19,14 @@ python train.py --data_path /YourDataPath
|
|||
"""
|
||||
|
||||
import os
|
||||
# import sys
|
||||
# sys.path.append(os.path.join(os.getcwd(), 'utils'))
|
||||
from utils.config import config
|
||||
from utils.moxing_adapter import moxing_wrapper
|
||||
from utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
|
||||
|
||||
# from src.config import alexnet_cifar10_config, alexnet_imagenet_config
|
||||
from src.model_utils.config import config
|
||||
from src.model_utils.moxing_adapter import moxing_wrapper
|
||||
from src.model_utils.device_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
|
||||
from src.dataset import create_dataset_cifar10, create_dataset_imagenet
|
||||
from src.generator_lr import get_lr_cifar10, get_lr_imagenet
|
||||
from src.alexnet import AlexNet
|
||||
from src.get_param_groups import get_param_groups
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore.communication.management import init, get_rank
|
||||
from mindspore import dataset as de
|
||||
|
@ -44,14 +41,9 @@ from mindspore.common import set_seed
|
|||
set_seed(1)
|
||||
de.config.set_seed(1)
|
||||
|
||||
if os.path.exists(config.data_path_local):
|
||||
config.data_path = config.data_path_local
|
||||
config.checkpoint_path = os.path.join(config.checkpoint_path, str(get_rank_id()))
|
||||
else:
|
||||
config.checkpoint_path = os.path.join(config.output_path, config.checkpoint_path, str(get_rank_id()))
|
||||
|
||||
def modelarts_pre_process():
|
||||
pass
|
||||
# config.ckpt_path = os.path.join(config.output_path, str(get_rank_id()), config.checkpoint_path)
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def train_alexnet():
|
||||
|
@ -135,9 +127,9 @@ def train_alexnet():
|
|||
raise ValueError("Unsupported platform.")
|
||||
|
||||
if device_num > 1:
|
||||
ckpt_save_dir = os.path.join(config.checkpoint_path + "_" + str(get_rank()))
|
||||
ckpt_save_dir = os.path.join(config.ckpt_path + "_" + str(get_rank()))
|
||||
else:
|
||||
ckpt_save_dir = config.checkpoint_path
|
||||
ckpt_save_dir = config.ckpt_path
|
||||
|
||||
time_cb = TimeMonitor(data_size=step_per_epoch)
|
||||
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
|
||||
|
|
Loading…
Reference in New Issue