!17400 fix bug export and yaml file

From: @Maigee
Reviewed-by: @c_34,@oacjiewen
Signed-off-by: @c_34
This commit is contained in:
mindspore-ci-bot 2021-06-01 14:18:32 +08:00 committed by Gitee
commit 9ea606f82a
9 changed files with 155 additions and 34 deletions

View File

@ -127,6 +127,7 @@ After installing MindSpore via the official website, you can start training and
# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
# a. set "enable_modelarts=True" 。
# set "is_distributed=1"
# set "save_ckpt_path=/cache/train/outputs_imagenet/"
# set "train_data_dir=/cache/data/train/train_dataset/"
# set "train_pretrained=/cache/data/train/train_predtrained/pred file name" Without pre-training weights train_pretrained=""

View File

@ -130,6 +130,7 @@ DenseNet-100使用的数据集 Cifar-10
# (1) 选择a(修改yaml文件参数)或者b(ModelArts创建训练作业修改参数)其中一种方式。
# a. 设置 "enable_modelarts=True" 。
# 设置 "is_distributed=1"
# 设置 "save_ckpt_path=/cache/train/outputs_imagenet/"
# 设置 "train_data_dir=/cache/data/train/train_dataset/"
# 设置 "train_pretrained=/cache/data/train/train_predtrained/pred file name" 如果没有预训练权重 train_pretrained=""

View File

@ -25,7 +25,7 @@ group_size: 1
# ==============================================================================
# Training options
train_data_dir: "/data1/mjq/dataset/cifar10/cifar-10-batches-bin/"
train_data_dir: ""
train_pretrained: ""
model_train_url: ""
image_size: "32, 32"
@ -47,18 +47,24 @@ label_smooth: 0
label_smooth_factor: 0.1
log_interval: 100
ckpt_interval: 3124
save_ckpt_path: "/data1/mjq/ckpt/outputs_cifar10/"
save_ckpt_path: "./"
is_save_on_master: 1
# Eval options
eval_data_dir: "/data1/mjq/dataset/cifar10/cifar-10-verify-bin"
eval_data_dir: ""
backbone: "resnet50"
ckpt_files: ""
log_path: ""
eval_url: ""
# export options
device_id: 0
batch_size: 32
file_name: "densenet"
file_format: "MINDIR"
---
# Help description for each configuration
enable_modelarts: "if enable_modelarts"
@ -87,3 +93,6 @@ ckpt_files: "fully path of pretrained model to load If it is a direction, it wil
rank: "local rank of distributed"
group size: "world size of distributed"
backbone: "backbone"
device_id: "Device id"
file_name: "output file name"
file_format: "file format choices [AIR MINDIR ONNX]"

View File

@ -24,7 +24,7 @@ group_size: 1
# ==============================================================================
# Training options
train_data_dir: "/data1/mjq/dataset/ImageNet_Original/train/"
train_data_dir: ""
train_pretrained: ""
model_train_url: ""
image_size: "224, 224"
@ -46,18 +46,24 @@ label_smooth: 0
label_smooth_factor: 0.1
log_interval: 100
ckpt_interval: 50000
save_ckpt_path: "/data1/mjq/ckpt/outputs_imagenet/"
save_ckpt_path: "./"
is_save_on_master: 1
# Eval options
eval_data_dir: "/data1/mjq/dataset/ImageNet_Original/validation_preprocess/"
eval_data_dir: ""
backbone: "resnet50"
ckpt_files: ""
log_path: ""
eval_url: ""
# export options
device_id: 0
batch_size: 32
file_name: "densenet"
file_format: "MINDIR"
---
# Help description for each configuration
enable_modelarts: "if enable_modelarts"
@ -86,4 +92,7 @@ ckpt_files: "fully path of pretrained model to load If it is a direction, it wil
rank: "local rank of distributed"
group size: "world size of distributed"
backbone: "backbone"
device_id: "Device id"
file_name: "output file name"
file_format: "file format choices [AIR MINDIR ONNX]"

View File

@ -12,41 +12,34 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""export checkpoint file into air, onnx, mindir models"""
import argparse
import numpy as np
"""export checkpoint file into air, onnx, mindir models
Suggest run as python export.py --file_name [file_name] --ckpt_files [ckpt path] --file_format [file format]
"""
import numpy as np
from mindspore.common import dtype as mstype
from mindspore import context, Tensor
from mindspore.train.serialization import export, load_checkpoint, load_param_into_net
from src.model_utils.config import config
parser = argparse.ArgumentParser(description="densenet export")
parser.add_argument("--net", type=str, default='', help="Densenet Model, densenet100 or densenet121")
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--batch_size", type=int, default=32, help="batch size")
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--file_name", type=str, default="densenet", help="output file name.")
parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="AIR", help="file format")
parser.add_argument("--device_target", type=str, choices=["Ascend", "GPU", "CPU"], default="Ascend",
help="device target")
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
if args.device_target == "Ascend":
context.set_context(device_id=args.device_id)
if args.net == "densenet100":
from src.config import config_100 as config
if config.device_target == "Ascend":
context.set_context(device_id=config.device_id)
if config.net == "densenet100":
from src.network.densenet import DenseNet100 as DenseNet
else:
from src.config import config_121 as config
from src.network.densenet import DenseNet121 as DenseNet
if __name__ == "__main__":
network = DenseNet(config.num_classes)
param_dict = load_checkpoint(args.ckpt_file)
param_dict = load_checkpoint(config.ckpt_files)
param_dict_new = {}
for key, value in param_dict.items():
@ -62,7 +55,7 @@ if __name__ == "__main__":
network.add_flags_recursive(fp16=True)
network.set_train(False)
shape = [int(args.batch_size), 3] + [int(config.image_size.split(",")[0]), int(config.image_size.split(",")[1])]
shape = [int(config.batch_size), 3] + [int(config.image_size.split(",")[0]), int(config.image_size.split(",")[1])]
input_data = Tensor(np.zeros(shape), mstype.float32)
export(network, input_data, file_name=args.file_name, file_format=args.file_format)
export(network, input_data, file_name=config.file_name, file_format=config.file_format)

View File

@ -53,15 +53,15 @@ then
mpirun -n $1 --allow-run-as-root python3 ${BASEPATH}/../eval.py \
--net=$3 \
--dataset=$4 \
--train_data_dir=$5 \
--eval_data_dir=$5 \
--device_target='GPU' \
--train_pretrained=$6 > eval.log 2>&1 &
--ckpt_files=$6 > eval.log 2>&1 &
else
python3 ${BASEPATH}/../eval.py \
--net=$3 \
--dataset=$4 \
--train_data_dir=$5 \
--eval_data_dir=$5 \
--device_target='GPU' \
--train_pretrained=$6 > eval.log 2>&1 &
--ckpt_files=$6 > eval.log 2>&1 &
fi

View File

@ -47,6 +47,7 @@ then
mpirun -n $1 --allow-run-as-root python3 ${BASEPATH}/../train.py \
--net=$3 \
--dataset=$4 \
--is_distributed=1 \
--train_data_dir=$5 \
--device_target='GPU' \
--train_pretrained=$6 > train.log 2>&1 &
@ -65,6 +66,7 @@ else
mpirun -n $1 --allow-run-as-root python3 ${BASEPATH}/../train.py \
--net=$3 \
--dataset=$4 \
--is_distributed=1 \
--train_data_dir=$5 \
--device_target='GPU' > train.log 2>&1 &
else

View File

@ -40,7 +40,7 @@ cd ../eval || exit
python ${BASEPATH}/../eval.py \
--net=$1 \
--dataset=$2 \
--train_data_dir=$3 \
--eval_data_dir=$3 \
--device_target='CPU' \
--is_distributed=0 \
--train_pretrained=$4 > eval.log 2>&1 &
--ckpt_files=$4 > eval.log 2>&1 &

View File

@ -0,0 +1,106 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [[ $# -lt 4 || $# -gt 5 ]]; then
echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATASET] [DATA_PATH] [LABEL_FILE] [DEVICE_ID]
DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero"
exit 1
fi
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
model=$(get_real_path $1)
dataset=$2
data_path=$(get_real_path $3)
label_file=$(get_real_path $4)
device_id=0
if [ $# == 4 ]; then
device_id=$4
fi
echo $model
echo $dataset
echo $data_path
echo $label_file
echo $device_id
export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
else
export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/opp
fi
function compile_app()
{
cd ../ascend310_infer || exit
if [ -f "Makefile" ]; then
make clean
fi
sh build.sh &> build.log
if [ $? -ne 0 ]; then
echo "compile app code failed"
exit 1
fi
cd - || exit
}
function infer()
{
if [ -d result_Files ]; then
rm -rf ./result_Files
fi
if [ -d time_Result ]; then
rm -rf ./time_Result
fi
mkdir result_Files
mkdir time_Result
../ascend310_infer/out/main --model_path=$model --dataset=$dataset --dataset_path=$data_path --device_id=$device_id &> infer.log
if [ $? -ne 0 ]; then
echo "execute inference failed"
exit 1
fi
}
function cal_acc()
{
python ../postprocess.py --dataset=$dataset --label_file=$label_file --result_path=result_Files &> acc.log
if [ $? -ne 0 ]; then
echo "calculate accuracy failed"
exit 1
fi
}
compile_app
infer
cal_acc