forked from mindspore-Ecosystem/mindspore
!33282 [MSLite][OnDeviceTraining] Delete train_lenet case with old APIs
Merge pull request !33282 from lz/new_api
This commit is contained in:
commit
aec1dc77fb
|
@ -1,4 +0,0 @@
|
|||
*.mindir
|
||||
*.ms
|
||||
msl
|
||||
package-*
|
|
@ -1,49 +0,0 @@
|
|||
BASE_DIR=$(realpath ../../../../)
|
||||
APP:=bin/net_runner
|
||||
LMSLIB:=-lmindspore-lite-train -lmindspore-lite
|
||||
LMDLIB:=-lminddata-lite
|
||||
MSDIR:=$(realpath package-$(TARGET)/lib)
|
||||
ifneq ("$(wildcard $(MSDIR)/libhiai.so)","")
|
||||
LHIAILIB:=-lhiai_ir_build -lhiai_ir -lhiai
|
||||
else
|
||||
LHIAILIB:=
|
||||
endif
|
||||
|
||||
SRC:=src/net_runner.cc
|
||||
OBJ:=$(SRC:.cc=.o)
|
||||
|
||||
CFLAGS := -Ofast -std=c++17 \
|
||||
-I . \
|
||||
-I ./msl/runtime \
|
||||
-I ./msl/runtime/include \
|
||||
-I ./msl/runtime/minddata \
|
||||
-I ./msl/tools/third_party/flatbuffers/include
|
||||
|
||||
|
||||
ifeq ($(TARGET),arm64)
|
||||
CXX := ${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/bin/clang++
|
||||
CFLAGS += --target=aarch64-none-linux-android21 --gcc-toolchain=${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64 --sysroot=${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/sysroot -fdata-sections -ffunction-sections
|
||||
LDFLAGS := --target=aarch64-none-linux-android21 --gcc-toolchain=${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64 --sysroot=${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/sysroot -Wl,--gc-sections
|
||||
LDFLAGS += -L$(MSDIR) $(LMSLIB) $(LMDLIB) -pthread -llog -latomic -lm $(LHIAILIB) -Wl,-rpath,$(MSDIR)
|
||||
else
|
||||
CFLAGS += -g
|
||||
LDFLAGS := -L$(MSDIR) $(LMSLIB) $(LMDLIB) -lpthread -Wl,-rpath,$(MSDIR)
|
||||
endif
|
||||
LD := ${CXX}
|
||||
|
||||
|
||||
all:$(APP)
|
||||
|
||||
$(APP): $(OBJ)
|
||||
@mkdir -p bin
|
||||
$(LD) $(OBJ) $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
rm -rf src/*.o bin/
|
||||
|
||||
|
||||
mrproper:
|
||||
rm -rf package* msl src/*.o bin/ model/*.mindir model/*.ms model/*.so* model/converter_lite
|
||||
|
||||
%.o:%.cc
|
||||
$(CXX) $(CFLAGS) -c $< -o $@
|
|
@ -1,145 +0,0 @@
|
|||
# Content
|
||||
|
||||
<!-- TOC -->
|
||||
|
||||
- [Overview](#overview)
|
||||
- [Model Architecture](#model-architecture)
|
||||
- [Dataset](#dataset)
|
||||
- [Environment Requirements](#environment-requirements)
|
||||
- [Quick Start](#quick-start)
|
||||
- [Script Detailed Description](#script-detailed-description)
|
||||
|
||||
<!-- /TOC -->
|
||||
|
||||
# Overview
|
||||
|
||||
This folder holds code for Training-on-Device of a LeNet model. Part of the code runs on a server using MindSpore infrastructure, another part uses MindSpore Lite conversion utility, and the last part is the actual training of the model on some android-based device.
|
||||
|
||||
# Model Architecture
|
||||
|
||||
LeNet is a very simple network which is composed of only 5 layers, 2 of which are convolutional layers and the remaining 3 are fully connected layers. Such a small network can be fully trained (from scratch) on a device in a short time. Therefore, it is a good example.
|
||||
|
||||
# Dataset
|
||||
|
||||
In this example we use the MNIST dataset of handwritten digits as published in [THE MNIST DATABASE](http://yann.lecun.com/exdb/mnist/)
|
||||
|
||||
- Dataset size:52.4M,60,000 28*28 in 10 classes
|
||||
- Test:10,000 images
|
||||
- Train:60,000 images
|
||||
- Data format:binary files
|
||||
- Note:Data will be processed in dataset.cc
|
||||
|
||||
- The dataset directory structure is as follows:
|
||||
|
||||
```text
|
||||
mnist/
|
||||
├── test
|
||||
│ ├── t10k-images-idx3-ubyte
|
||||
│ └── t10k-labels-idx1-ubyte
|
||||
└── train
|
||||
├── train-images-idx3-ubyte
|
||||
└── train-labels-idx1-ubyte
|
||||
```
|
||||
|
||||
# Environment Requirements
|
||||
|
||||
- Server side
|
||||
- [MindSpore Framework](https://www.mindspore.cn/install/en): it is recommended to install a docker image
|
||||
- MindSpore ToD Framework
|
||||
- [Downloads](https://www.mindspore.cn/lite/docs/en/master/use/downloads.html)
|
||||
- [Build](https://www.mindspore.cn/lite/docs/en/master/use/build.html)
|
||||
- [Android NDK r20b](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip)
|
||||
- [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools)
|
||||
- A connected Android device
|
||||
|
||||
# Quick Start
|
||||
|
||||
After installing all the above mentioned, the script in the home directory could be run with the following arguments:
|
||||
|
||||
```bash
|
||||
sh ./prepare_and_run.sh -D DATASET_PATH [-d MINDSPORE_DOCKER] [-r RELEASE.tar.gz] [-t arm64|x86]
|
||||
```
|
||||
|
||||
where:
|
||||
|
||||
- DATASET_PATH is the path to the [dataset](#dataset),
|
||||
- MINDSPORE_DOCKER is the image name of the docker that runs [MindSpore](#environment-requirements). If not provided MindSpore will be run locally
|
||||
- RELEASE.tar.gz is a pointer to the MindSpore ToD release tar ball. If not provided, the script will attempt to find MindSpore ToD compilation output
|
||||
- target is defaulted to arm64, i.e., on-device. If x86 is provided, the demo will be run locally. Note that infrastructure is not optimized for running on x86. Also, note that user needs to call "make clean" when switching between targets.
|
||||
|
||||
# Script Detailed Description
|
||||
|
||||
The provided `prepare_and_run.sh` script is performing the following:
|
||||
|
||||
- Prepare the trainable lenet model in a `.ms` format
|
||||
- Prepare the folder that should be pushed into the device
|
||||
- Copy this folder into the device and run the scripts on the device
|
||||
|
||||
See how to run the script and parameters definitions in the [Quick Start Section](#quick-start)
|
||||
|
||||
## Preparing the model
|
||||
|
||||
Within the model folder a `prepare_model.sh` script uses MindSpore infrastructure to export the model into a `.mindir` file. The user can specify a docker image on which MindSpore is installed. Otherwise, the python script will be run locally.
|
||||
The script then converts the `.mindir` to a `.ms` format using the MindSpore ToD converter.
|
||||
The script accepts a tar ball where the converter resides. Otherwise, the script will attempt to find the converter in the MindSpore ToD build output directory.
|
||||
|
||||
## Preparing the Folder
|
||||
|
||||
The `lenet_tod.ms` model file is then copied into the `package` folder as well as scripts, the MindSpore ToD library and the MNIST dataset.
|
||||
Finally, the code (in src) is compiled for arm64 and the binary is copied into the `package` folder.
|
||||
|
||||
### Running the code on the device
|
||||
|
||||
To run the code on the device the script first uses `adb` tool to push the `package` folder into the device. It then runs training (which takes some time) and finally runs evaluation of the trained model using the test data.
|
||||
|
||||
# Folder Directory tree
|
||||
|
||||
``` python
|
||||
train_lenet/
|
||||
├── Makefile # Makefile of src code
|
||||
├── model
|
||||
│ ├── lenet_export.py # Python script that exports the LeNet model to .mindir
|
||||
│ ├── prepare_model.sh # script that export model (using docker) then converts it
|
||||
│ └── train_utils.py # utility function used during the export
|
||||
├── prepare_and_run.sh # main script that creates model, compiles it and send to device for running
|
||||
├── README.md # English manual
|
||||
├── README_CN.md # Chinese manual
|
||||
├── scripts
|
||||
│ ├── eval.sh # on-device script that load the train model and evaluates its accuracy
|
||||
│ └── train.sh # on-device script that load the initial model and train it
|
||||
├── src
|
||||
│ ├── net_runner.cc # program that runs training/evaluation of models
|
||||
│ ├── net_runner.h # net_runner header
|
||||
│ └── utils.h # general utilities
|
||||
```
|
||||
|
||||
When the `prepare_and_run.sh` script is run, the following folder is prepared. It is pushed to the device and then training runs
|
||||
|
||||
``` python
|
||||
├── package
|
||||
│ ├── bin
|
||||
│ │ └── net_runner # the executable that performs the training/evaluation
|
||||
│ ├── dataset
|
||||
│ │ ├── test
|
||||
│ │ │ ├── t10k-images-idx3-ubyte # test images
|
||||
│ │ │ └── t10k-labels-idx1-ubyte # test labels
|
||||
│ │ └── train
|
||||
│ │ ├── train-images-idx3-ubyte # train images
|
||||
│ │ └── train-labels-idx1-ubyte # train labels
|
||||
│ ├── eval.sh # on-device script that load the train model and evaluates its accuracy
|
||||
│ ├── lib
|
||||
│ │ ├── libjpeg.so.62
|
||||
│ │ ├── libminddata-lite.a
|
||||
│ │ ├── libminddata-lite.so
|
||||
│ │ ├── libmindspore-lite.a
|
||||
│ │ ├── libmindspore-lite-jni.so
|
||||
│ │ ├── libmindspore-lite.so
|
||||
│ │ ├── libmindspore-lite-train.a
|
||||
│ │ ├── libmindspore-lite-train-jni.so
|
||||
│ │ ├── libmindspore-lite-train.so
|
||||
│ │ ├── libturbojpeg.so.0
|
||||
│ │ └── mindspore-lite-java.jar
|
||||
│ ├── model
|
||||
│ │ └── lenet_tod.ms # model to train
|
||||
│ └── train.sh # on-device script that load the initial model and train it
|
||||
```
|
|
@ -1,134 +0,0 @@
|
|||
# 目录
|
||||
|
||||
<!-- TOC -->
|
||||
|
||||
- [目录](#目录)
|
||||
- [概述](#概述)
|
||||
- [数据集](#数据集)
|
||||
- [环境要求](#环境要求)
|
||||
- [快速入门](#快速入门)
|
||||
- [脚本详述](#脚本详述)
|
||||
- [模型准备](#模型准备)
|
||||
- [模型训练](#模型训练)
|
||||
- [工程目录](#工程目录)
|
||||
|
||||
<!-- /TOC -->
|
||||
|
||||
# 概述
|
||||
|
||||
本文主要讲解如何在端侧进行LeNet模型训练。首先在服务器或个人笔记本上进行模型转换;然后在安卓设备上训练模型。LeNet由2层卷积和3层全连接层组成,模型结构简单,因此可以在设备上快速训练。
|
||||
|
||||
# 数据集
|
||||
|
||||
本例使用[MNIST手写字数据集](http://yann.lecun.com/exdb/mnist/)
|
||||
|
||||
- 数据集大小:52.4M, 60,000 28*28 10类
|
||||
- 测试集:10,000 images
|
||||
- 训练集:60,000 images
|
||||
|
||||
- 数据格式:二进制文件
|
||||
- 注意:数据处理会在dataset.cc中进行。
|
||||
|
||||
- 数据集目录结构如下:
|
||||
|
||||
```text
|
||||
mnist/
|
||||
├── test
|
||||
│ ├── t10k-images-idx3-ubyte
|
||||
│ └── t10k-labels-idx1-ubyte
|
||||
└── train
|
||||
├── train-images-idx3-ubyte
|
||||
└── train-labels-idx1-ubyte
|
||||
```
|
||||
|
||||
# 环境要求
|
||||
|
||||
- 服务器或个人笔记本
|
||||
- [MindSpore Framework](https://www.mindspore.cn/install): 建议使用Docker安装
|
||||
- [MindSpore ToD Download](https://www.mindspore.cn/lite/docs/zh-CN/master/use/downloads.html)
|
||||
- [MindSpore ToD Build](https://www.mindspore.cn/lite/docs/zh-CN/master/use/build.html)
|
||||
- [Android NDK r20b](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip)
|
||||
- [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools)
|
||||
- Android移动设备
|
||||
|
||||
# 快速入门
|
||||
|
||||
安装完毕,在`./mindspore/mindspore/lite/examples/train_lenet`目录下执行脚本,命令如下:
|
||||
|
||||
```bash
|
||||
sh ./prepare_and_run.sh -D DATASET_PATH [-d MINDSPORE_DOCKER] [-r RELEASE.tar.gz] [-t arm64|x86]
|
||||
```
|
||||
|
||||
其中,`DATASET_PATH`是数据集路径;`MINDSPORE_DOCKER`是运行MindSpore的docker镜像,如果没有使用docker环境,则使用本地运行;`REALEASE.tar.gz`为端侧运行时训练工具压缩包绝对路径;`-t`选项为设备处理器架构,默认为`arm64`,如果输入`x86`则本地运行。注意:若在不同平台执行训练,需在先执行脚本前运行`make clean`指令。
|
||||
|
||||
# 脚本详述
|
||||
|
||||
`prepare_and_run.sh`脚本的功能如下:
|
||||
|
||||
- 将Python模型文件转换为`.ms`文件。
|
||||
- 编译训练源码并将相关文件传输到设备端
|
||||
- 设备端执行训练
|
||||
|
||||
运行命令参见[快速入门](#快速入门)
|
||||
|
||||
## 模型准备
|
||||
|
||||
脚本`prepare_model.sh`会基于MIndSpore架构将Python模型转换为`lenet_tod.mindir`模型;然后,使用MindSpore ToD 模型转换工具将`lenet_tod.mindir`文件转换为`lenet_tod.ms`文件。如果没有docker环境,则本地执行转换。
|
||||
|
||||
## 模型训练
|
||||
|
||||
将`lenet_tod.ms`模型文件、训练脚本、MindSpore ToD库文件和`MNIST`数据集拷贝到`package`文件夹。`/src`文件夹中代码将会被编译成arm64架构版本,生成的二进制文件会被拷贝至`package`文件夹。最后使用`adb`工具将`package`文件夹传输至设备端,并执行训练。
|
||||
|
||||
# 工程目录
|
||||
|
||||
``` txt
|
||||
train_lenet/
|
||||
├── Makefile # Makefile of src code
|
||||
├── model
|
||||
│ ├── lenet_export.py # Python script that exports the LeNet model to .mindir
|
||||
│ ├── prepare_model.sh # script that export model (using docker) then converts it
|
||||
│ └── train_utils.py # utility function used during the export
|
||||
├── prepare_and_run.sh # main script that creates model, compiles it and send to device for running
|
||||
├── README.md # this manual
|
||||
├── scripts
|
||||
│ ├── eval.sh # on-device script that load the train model and evaluates its accuracy
|
||||
│ ├── run_eval.sh # adb script that launches eval.sh
|
||||
│ ├── run_train.sh # adb script that launches train.sh
|
||||
│ └── train.sh # on-device script that load the initial model and train it
|
||||
├── src
|
||||
│ ├── dataset.cc # dataset handler
|
||||
│ ├── dataset.h # dataset class header
|
||||
│ ├── net_runner.cc # program that runs training/evaluation of models
|
||||
│ └── net_runner.h # net_runner header
|
||||
```
|
||||
|
||||
在脚本`prepare_and_run.sh`运行前,必须确保以下目录结构正确,这些文件将被传入设备用于训练。
|
||||
|
||||
``` txt
|
||||
├── package
|
||||
│ ├── bin
|
||||
│ │ └── net_runner # the executable that performs the training/evaluation
|
||||
│ ├── dataset
|
||||
│ │ ├── test
|
||||
│ │ │ ├── t10k-images-idx3-ubyte # test images
|
||||
│ │ │ └── t10k-labels-idx1-ubyte # test labels
|
||||
│ │ └── train
|
||||
│ │ ├── train-images-idx3-ubyte # train images
|
||||
│ │ └── train-labels-idx1-ubyte # train labels
|
||||
│ ├── eval.sh # on-device script that load the train model and evaluates its accuracy
|
||||
│ ├── lib
|
||||
│ │ ├── libjpeg.so.62
|
||||
│ │ ├── libminddata-lite.a
|
||||
│ │ ├── libminddata-lite.so
|
||||
│ │ ├── libmindspore-lite.a
|
||||
│ │ ├── libmindspore-lite-jni.so
|
||||
│ │ ├── libmindspore-lite.so
|
||||
│ │ ├── libmindspore-lite-train.a
|
||||
│ │ ├── libmindspore-lite-train-jni.so
|
||||
│ │ ├── libmindspore-lite-train.so
|
||||
│ │ ├── libturbojpeg.so.0
|
||||
│ │ └── mindspore-lite-java.jar
|
||||
│ ├── model
|
||||
│ │ └── lenet_tod.ms # model to train
|
||||
│ └── train.sh # on-device script that load the initial model and train it
|
||||
```
|
|
@ -1,39 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""lenet_export."""
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from mindspore import context, Tensor, FixedLossScaleManager
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore.train.serialization import export
|
||||
from lenet import LeNet5
|
||||
from train_utils import train_wrap
|
||||
|
||||
|
||||
n = LeNet5()
|
||||
n.set_train()
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU", save_graphs=False)
|
||||
|
||||
BATCH_SIZE = int(sys.argv[1])
|
||||
x = Tensor(np.ones((BATCH_SIZE, 1, 32, 32)), mstype.float32)
|
||||
label = Tensor(np.zeros([BATCH_SIZE]).astype(np.int32))
|
||||
net = train_wrap(n)
|
||||
export(net, x, label, file_name="lenet_tod", file_format='MINDIR')
|
||||
loss_scale = 128.0
|
||||
loss_scale_manager = FixedLossScaleManager(loss_scale, False)
|
||||
mix_precision_net = train_wrap(n, None, None, None, loss_scale_manager)
|
||||
export(mix_precision_net, x, label, file_name="mix_lenet_tod", file_format='MINDIR')
|
||||
print("finished exporting")
|
|
@ -1,51 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [[ -z ${EXPORT} ]]; then
|
||||
echo "============Exporting=========="
|
||||
rm -f lenet_tod.mindir
|
||||
if [ -n "$2" ]; then
|
||||
DOCKER_IMG=$2
|
||||
docker run -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER --privileged=true ${DOCKER_IMG} /bin/bash -c "PYTHONPATH=../../../../../tests/perf_test python lenet_export.py '$1'; chmod 444 lenet_tod.mindir; rm -rf __pycache__"
|
||||
else
|
||||
echo "MindSpore docker was not provided, attempting to run locally"
|
||||
PYTHONPATH=../../../../../tests/perf_test python lenet_export.py $1
|
||||
fi
|
||||
fi
|
||||
|
||||
CONVERTER="../../../build/tools/converter/converter_lite"
|
||||
$CONVERTER &> /dev/null
|
||||
if [ "$?" -ne 0 ]; then
|
||||
if ! command -v converter_lite &> /dev/null
|
||||
then
|
||||
tar -xzf ../../../../../output/mindspore-lite-*-linux-x64.tar.gz --strip-components 4 --wildcards --no-anchored 'converter_lite' '*so.*' '*.so'
|
||||
if [ -f ./converter_lite ]; then
|
||||
CONVERTER=./converter_lite
|
||||
else
|
||||
echo "converter_lite could not be found in MindSpore build directory nor in system path"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
CONVERTER=converter_lite
|
||||
fi
|
||||
fi
|
||||
|
||||
function GenerateWeightQuantConfig() {
|
||||
echo "[common_quant_param]" > $4
|
||||
echo "quant_type=WEIGHT_QUANT" >> $4
|
||||
echo "bit_num=$1" >> $4
|
||||
echo "min_quant_weight_size=$2" >> $4
|
||||
echo "min_quant_weight_channel=$3" >> $4
|
||||
}
|
||||
|
||||
echo "============Converting========="
|
||||
QUANT_OPTIONS=""
|
||||
if [[ ! -z ${QUANTIZE} ]]; then
|
||||
echo "Quantizing weights"
|
||||
WEIGHT_QUANT_CONFIG=ci_lenet_tod_weight_quant.cfg
|
||||
GenerateWeightQuantConfig 8 100 15 ${WEIGHT_QUANT_CONFIG}
|
||||
QUANT_OPTIONS="--configFile=${WEIGHT_QUANT_CONFIG}"
|
||||
fi
|
||||
LD_LIBRARY_PATH=./:${LD_LIBRARY_PATH} $CONVERTER --fmk=MINDIR --trainModel=true --modelFile=lenet_tod.mindir --outputFile=lenet_tod $QUANT_OPTIONS
|
||||
if [[ ! -z ${MIX_FLAG} ]]; then
|
||||
LD_LIBRARY_PATH=./:${LD_LIBRARY_PATH} $CONVERTER --fmk=MINDIR --trainModel=true --modelFile=mix_lenet_tod.mindir --outputFile=mix_lenet_tod
|
||||
fi
|
|
@ -1,39 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""train_utils."""
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore.common.parameter import ParameterTuple
|
||||
from mindspore import amp
|
||||
|
||||
|
||||
def train_wrap(net, loss_fn=None, optimizer=None, weights=None, loss_scale_manager=None):
|
||||
"""
|
||||
train_wrap
|
||||
"""
|
||||
if loss_fn is None:
|
||||
loss_fn = nn.SoftmaxCrossEntropyWithLogits(reduction='mean', sparse=True)
|
||||
loss_net = nn.WithLossCell(net, loss_fn)
|
||||
loss_net.set_train()
|
||||
if weights is None:
|
||||
weights = ParameterTuple(net.trainable_params())
|
||||
if optimizer is None:
|
||||
optimizer = nn.Adam(weights, learning_rate=0.003, beta1=0.9, beta2=0.999, eps=1e-5, use_locking=False,
|
||||
use_nesterov=False, weight_decay=4e-5, loss_scale=1.0)
|
||||
if loss_scale_manager is None:
|
||||
train_net = nn.TrainOneStepCell(loss_net, optimizer)
|
||||
else:
|
||||
train_net = amp.build_train_network(net, optimizer, loss_fn, level="O2", loss_scale_manager=loss_scale_manager)
|
||||
return train_net
|
|
@ -1,218 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
display_usage()
|
||||
{
|
||||
echo -e "\nUsage: prepare_and_run.sh -D dataset_path [-d mindspore_docker] [-r release.tar.gz] [-t arm64|x86] [-q] [-o] [-M] [-b virtual_batch] [-m mindir] [-e epochs_to_train] [-i device_id]\n"
|
||||
}
|
||||
|
||||
checkopts()
|
||||
{
|
||||
TARGET="arm64"
|
||||
DOCKER=""
|
||||
MINDIR_FILE=""
|
||||
MNIST_DATA_PATH=""
|
||||
QUANTIZE=""
|
||||
FP16_FLAG=""
|
||||
VIRTUAL_BATCH=-1
|
||||
EPOCHS="-e 5"
|
||||
MIX_FLAG=""
|
||||
DEVICE_ID=""
|
||||
while getopts 'D:b:d:e:i:m:oqr:t:M:' opt
|
||||
do
|
||||
case "${opt}" in
|
||||
b)
|
||||
VIRTUAL_BATCH=$OPTARG
|
||||
;;
|
||||
D)
|
||||
MNIST_DATA_PATH=$OPTARG
|
||||
;;
|
||||
d)
|
||||
DOCKER=$OPTARG
|
||||
;;
|
||||
e)
|
||||
EPOCHS="-e $OPTARG"
|
||||
;;
|
||||
m)
|
||||
MINDIR_FILE=$OPTARG
|
||||
;;
|
||||
o)
|
||||
FP16_FLAG="-o"
|
||||
;;
|
||||
q)
|
||||
QUANTIZE="QUANTIZE"
|
||||
;;
|
||||
r)
|
||||
TARBALL=$OPTARG
|
||||
;;
|
||||
M)
|
||||
MIX_FLAG="-m"
|
||||
FP16_FLAG="-o"
|
||||
echo $OPTARG
|
||||
;;
|
||||
t)
|
||||
if [ "$OPTARG" == "arm64" ] || [ "$OPTARG" == "x86" ]; then
|
||||
TARGET=$OPTARG
|
||||
else
|
||||
echo "No such target " $OPTARG
|
||||
display_usage
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
i)
|
||||
DEVICE_ID=$OPTARG
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option ${opt}!"
|
||||
display_usage
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
START=$(date +%s.%N)
|
||||
checkopts "$@"
|
||||
if [ "$MNIST_DATA_PATH" == "" ]; then
|
||||
echo "MNIST Dataset directory path was not provided"
|
||||
display_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$TARBALL" == "" ]; then
|
||||
if [ "${TARGET}" == "arm64" ]; then
|
||||
file=$(ls ../../../../output/mindspore-lite-*-android-aarch64.tar.gz)
|
||||
else
|
||||
file=$(ls ../../../../output/mindspore-lite-*-linux-x64.tar.gz)
|
||||
fi
|
||||
if [[ ${file} != "" ]] && [[ -f ${file} ]]; then
|
||||
TARBALL=${file}
|
||||
else
|
||||
echo "release.tar.gz was not found"
|
||||
display_usage
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
# Prepare the model
|
||||
if [[ "${VIRTUAL_BATCH}" == "-1" ]]; then
|
||||
BATCH=32
|
||||
else
|
||||
BATCH=1
|
||||
fi
|
||||
|
||||
EXPORT=""
|
||||
if [ "$MINDIR_FILE" != "" ]; then
|
||||
cp -f $MINDIR_FILE model/lenet_tod.mindir
|
||||
EXPORT="DONT_EXPORT"
|
||||
fi
|
||||
|
||||
cd model/ || exit 1
|
||||
rm -f *.ms
|
||||
EXPORT=${EXPORT} QUANTIZE=${QUANTIZE} MIX_FLAG=${MIX_FLAG} ./prepare_model.sh $BATCH $DOCKER || exit 1
|
||||
cd ../
|
||||
|
||||
# Copy the .ms model to the package folder
|
||||
|
||||
PACKAGE=package-${TARGET}
|
||||
|
||||
rm -rf ${PACKAGE}
|
||||
mkdir -p ${PACKAGE}/model
|
||||
cp model/*.ms ${PACKAGE}/model || exit 1
|
||||
|
||||
# Copy the running script to the package
|
||||
cp scripts/*.sh ${PACKAGE}/
|
||||
|
||||
# Copy the shared MindSpore ToD library
|
||||
tar -xzf ${TARBALL}
|
||||
mv mindspore-*/runtime/lib ${PACKAGE}/
|
||||
mv mindspore-*/runtime/third_party/libjpeg-turbo/lib/* ${PACKAGE}/lib/
|
||||
cd mindspore-*
|
||||
if [[ "${TARGET}" == "arm64" ]] && [[ -d "runtime/third_party/hiai_ddk/lib" ]]; then
|
||||
mv runtime/third_party/hiai_ddk/lib/* ../${PACKAGE}/lib/
|
||||
fi
|
||||
|
||||
cd ../
|
||||
rm -rf msl
|
||||
mv mindspore-* msl/
|
||||
rm -rf msl/tools/
|
||||
rm ${PACKAGE}/lib/*.a
|
||||
|
||||
# Copy the dataset to the package
|
||||
cp -r $MNIST_DATA_PATH ${PACKAGE}/dataset || exit 1
|
||||
|
||||
echo "==========Compiling============"
|
||||
make clean
|
||||
make TARGET=${TARGET}
|
||||
|
||||
# Copy the executable to the package
|
||||
mv bin ${PACKAGE}/ || exit 1
|
||||
|
||||
if [ "${TARGET}" == "arm64" ]; then
|
||||
cp ${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android/libc++_shared.so ${PACKAGE}/lib/ || exit 1
|
||||
if [ "${DEVICE_ID}" == "" ]; then
|
||||
echo "=======Pushing to device======="
|
||||
adb push ${PACKAGE} /data/local/tmp/
|
||||
if [ "${MIX_FLAG}" == "" ];then
|
||||
|
||||
# origin model is fp32 model
|
||||
echo "========Training on Device origin model is fp32====="
|
||||
adb shell "cd /data/local/tmp/package-arm64 && /system/bin/sh train.sh ${EPOCHS} ${FP16_FLAG} -b ${VIRTUAL_BATCH}"
|
||||
|
||||
echo
|
||||
echo "===Evaluating trained Model origin model is fp32====="
|
||||
adb shell "cd /data/local/tmp/package-arm64 && /system/bin/sh eval.sh ${FP16_FLAG}"
|
||||
echo
|
||||
else
|
||||
echo "========Training on Device origin model is fp16 ====="
|
||||
adb shell "cd /data/local/tmp/package-arm64 && /system/bin/sh train.sh ${EPOCHS} ${FP16_FLAG} -b ${VIRTUAL_BATCH} ${MIX_FLAG}"
|
||||
|
||||
echo
|
||||
echo "===Evaluating trained Model origin model is fp16====="
|
||||
adb shell "cd /data/local/tmp/package-arm64 && /system/bin/sh eval.sh ${FP16_FLAG} ${MIX_FLAG}"
|
||||
echo
|
||||
fi
|
||||
else
|
||||
echo "=======Pushing to device======="
|
||||
adb -s ${DEVICE_ID} push ${PACKAGE} /data/local/tmp/
|
||||
if [ "${MIX_FLAG}" == "" ];then
|
||||
|
||||
# origin model is fp32 model
|
||||
echo "========Training on Device origin model is fp32====="
|
||||
adb -s ${DEVICE_ID} shell "cd /data/local/tmp/package-arm64 && /system/bin/sh train.sh ${EPOCHS} ${FP16_FLAG} -b ${VIRTUAL_BATCH}"
|
||||
|
||||
echo
|
||||
echo "===Evaluating trained Model origin model is fp32====="
|
||||
adb -s ${DEVICE_ID} shell "cd /data/local/tmp/package-arm64 && /system/bin/sh eval.sh ${FP16_FLAG}"
|
||||
echo
|
||||
else
|
||||
echo "========Training on Device origin model is fp16 ====="
|
||||
adb -s ${DEVICE_ID} shell "cd /data/local/tmp/package-arm64 && /system/bin/sh train.sh ${EPOCHS} ${FP16_FLAG} -b ${VIRTUAL_BATCH} ${MIX_FLAG}"
|
||||
|
||||
echo
|
||||
echo "===Evaluating trained Model origin model is fp16====="
|
||||
adb -s ${DEVICE_ID} shell "cd /data/local/tmp/package-arm64 && /system/bin/sh eval.sh ${FP16_FLAG} ${MIX_FLAG}"
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
|
||||
else
|
||||
cd ${PACKAGE} || exit 1
|
||||
echo "======Training Locally========="
|
||||
./train.sh ${EPOCHS}
|
||||
if [ "$?" != "0" ]; then
|
||||
echo "Training Model failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "===Evaluating trained Model====="
|
||||
./eval.sh
|
||||
if [ "$?" != "0" ]; then
|
||||
echo "Eval Model failed"
|
||||
exit 1
|
||||
fi
|
||||
cd ..
|
||||
fi
|
||||
END=$(date +%s.%N)
|
||||
TIME=$(echo "$END-$START" | bc)
|
||||
echo "total run train lenet C++ time: $TIME s"
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
is_mix_model=$(echo "$@" | grep "m")
|
||||
if [[ "$is_mix_model" != "" ]]
|
||||
then
|
||||
LD_LIBRARY_PATH=./lib/ bin/net_runner -f model/mix_lenet_tod_trained.ms -e 0 -d dataset $1
|
||||
else
|
||||
LD_LIBRARY_PATH=./lib/ bin/net_runner -f model/lenet_tod_trained.ms -e 0 -d dataset $1
|
||||
fi
|
|
@ -1,24 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
# an simple tutorial as follows, more parameters can be setting
|
||||
is_mix_model=$(echo "$@" | grep "m")
|
||||
if [[ "$is_mix_model" != "" ]]
|
||||
then
|
||||
LD_LIBRARY_PATH=./lib/ bin/net_runner -f model/mix_lenet_tod.ms -d dataset "$@"
|
||||
else
|
||||
LD_LIBRARY_PATH=./lib/ bin/net_runner -f model/lenet_tod.ms -d dataset "$@"
|
||||
fi
|
|
@ -1,301 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/net_runner.h"
|
||||
#include <getopt.h>
|
||||
#include <malloc.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include "include/context.h"
|
||||
#include "include/dataset/datasets.h"
|
||||
#include "include/dataset/transforms.h"
|
||||
#include "include/dataset/vision_lite.h"
|
||||
#include "include/train/accuracy_metrics.h"
|
||||
#include "include/train/ckpt_saver.h"
|
||||
#include "include/train/classification_train_accuracy_monitor.h"
|
||||
#include "include/train/loss_monitor.h"
|
||||
#include "include/train/lr_scheduler.h"
|
||||
#include "include/train/train_cfg.h"
|
||||
#include "include/train/train_session.h"
|
||||
#include "src/utils.h"
|
||||
|
||||
using mindspore::dataset::Dataset;
|
||||
using mindspore::dataset::Mnist;
|
||||
using mindspore::dataset::SequentialSampler;
|
||||
using mindspore::dataset::TensorOperation;
|
||||
using mindspore::dataset::transforms::TypeCast;
|
||||
using mindspore::dataset::vision::Normalize;
|
||||
using mindspore::dataset::vision::Resize;
|
||||
using mindspore::lite::AccuracyMetrics;
|
||||
using mindspore::lite::Model;
|
||||
using mindspore::session::TrainLoopCallBack;
|
||||
using mindspore::session::TrainLoopCallBackData;
|
||||
|
||||
constexpr int kPrintNum = 10;
|
||||
constexpr float kScalePoint = 255.0f;
|
||||
constexpr int kBatchSize = 2;
|
||||
constexpr int kNCHWDims = 4;
|
||||
constexpr int kNCHWCDim = 2;
|
||||
constexpr int kPrintTimes = 100;
|
||||
constexpr int kSaveSteps = 1000;
|
||||
constexpr float kGammaFactor = 0.7f;
|
||||
constexpr static int kElem2Print = 10;
|
||||
|
||||
class Rescaler : public mindspore::session::TrainLoopCallBack {
|
||||
public:
|
||||
explicit Rescaler(float scale) : scale_(scale) {
|
||||
if (scale_ == 0) {
|
||||
scale_ = 1.0;
|
||||
}
|
||||
}
|
||||
~Rescaler() override = default;
|
||||
void StepBegin(const mindspore::session::TrainLoopCallBackData &cb_data) override {
|
||||
auto inputs = cb_data.session_->GetInputs();
|
||||
auto *input_data = reinterpret_cast<float *>(inputs.at(0)->MutableData());
|
||||
for (int k = 0; k < inputs.at(0)->ElementsNum(); k++) input_data[k] /= scale_;
|
||||
}
|
||||
|
||||
private:
|
||||
float scale_ = 1.0;
|
||||
};
|
||||
|
||||
class Measurement : public mindspore::session::TrainLoopCallBack {
|
||||
public:
|
||||
explicit Measurement(unsigned int epochs)
|
||||
: epochs_(epochs), time_avg_(std::chrono::duration<double, std::milli>(0)) {}
|
||||
~Measurement() override = default;
|
||||
void EpochBegin(const mindspore::session::TrainLoopCallBackData &cb_data) override {
|
||||
start_time_ = std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
int EpochEnd(const mindspore::session::TrainLoopCallBackData &cb_data) override {
|
||||
end_time_ = std::chrono::high_resolution_clock::now();
|
||||
auto time = std::chrono::duration<double, std::milli>(end_time_ - start_time_);
|
||||
time_avg_ += time;
|
||||
return mindspore::session::RET_CONTINUE;
|
||||
}
|
||||
void End(const mindspore::session::TrainLoopCallBackData &cb_data) override {
|
||||
if (epochs_ > 0) {
|
||||
std::cout << "AvgRunTime: " << time_avg_.count() / epochs_ << " ms" << std::endl;
|
||||
}
|
||||
|
||||
struct mallinfo info = mallinfo();
|
||||
std::cout << "Total allocation: " << info.arena + info.hblkhd << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> start_time_;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> end_time_;
|
||||
std::chrono::duration<double, std::milli> time_avg_;
|
||||
unsigned int epochs_;
|
||||
};
|
||||
|
||||
// Definition of verbose callback function after forwarding operator.
|
||||
bool after_callback(const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
|
||||
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
|
||||
const mindspore::CallBackParam &call_param) {
|
||||
printf("%s\n", call_param.node_name.c_str());
|
||||
for (size_t i = 0; i < after_inputs.size(); i++) {
|
||||
int num2p = (after_inputs.at(i)->ElementsNum());
|
||||
printf("in%zu(%d): ", i, num2p);
|
||||
if (num2p > kPrintNum) num2p = kPrintNum;
|
||||
if (after_inputs.at(i)->data_type() == mindspore::kNumberTypeInt32) {
|
||||
auto d = reinterpret_cast<int *>(after_inputs.at(i)->MutableData());
|
||||
for (int j = 0; j < num2p; j++) printf("%d, ", d[j]);
|
||||
} else {
|
||||
auto d = reinterpret_cast<float *>(after_inputs.at(i)->MutableData());
|
||||
for (int j = 0; j < num2p; j++) printf("%f, ", d[j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
for (size_t i = 0; i < after_outputs.size(); i++) {
|
||||
auto d = reinterpret_cast<float *>(after_outputs.at(i)->MutableData());
|
||||
int num2p = (after_outputs.at(i)->ElementsNum());
|
||||
printf("ou%zu(%d): ", i, num2p);
|
||||
if (num2p > kElem2Print) {
|
||||
num2p = kElem2Print;
|
||||
}
|
||||
for (int j = 0; j < num2p; j++) printf("%f, ", d[j]);
|
||||
printf("\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
NetRunner::~NetRunner() {
|
||||
if (loop_ != nullptr) delete loop_;
|
||||
if (session_ != nullptr) delete session_;
|
||||
}
|
||||
|
||||
void NetRunner::InitAndFigureInputs() {
|
||||
mindspore::lite::Context context;
|
||||
context.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = mindspore::lite::NO_BIND;
|
||||
context.device_list_[0].device_info_.cpu_device_info_.enable_float16_ = enable_fp16_;
|
||||
context.device_list_[0].device_type_ = mindspore::lite::DT_CPU;
|
||||
context.thread_num_ = 2;
|
||||
|
||||
mindspore::lite::TrainCfg train_cfg;
|
||||
session_ = mindspore::session::TrainSession::CreateTrainSession(ms_file_, &context, true, &train_cfg);
|
||||
MS_ASSERT(session_ != nullptr);
|
||||
|
||||
session_->SetupVirtualBatch(virtual_batch_);
|
||||
loop_ = mindspore::session::TrainLoop::CreateTrainLoop(session_);
|
||||
|
||||
if (verbose_) {
|
||||
loop_->SetKernelCallBack(nullptr, after_callback);
|
||||
}
|
||||
acc_metrics_ = std::shared_ptr<AccuracyMetrics>(new AccuracyMetrics);
|
||||
|
||||
loop_->Init({acc_metrics_.get()});
|
||||
|
||||
auto inputs = session_->GetInputs();
|
||||
MS_ASSERT(inputs.size() > 1);
|
||||
auto nhwc_input_dims = inputs.at(0)->shape();
|
||||
MS_ASSERT(nhwc_input_dims.size() == kNCHWDims);
|
||||
batch_size_ = nhwc_input_dims.at(0);
|
||||
h_ = nhwc_input_dims.at(1);
|
||||
w_ = nhwc_input_dims.at(kNCHWCDim);
|
||||
}
|
||||
|
||||
float NetRunner::CalculateAccuracy(int max_tests) {
|
||||
test_ds_ = Mnist(data_dir_ + "/test", "all");
|
||||
TypeCast typecast_f(mindspore::DataType::kNumberTypeFloat32);
|
||||
Resize resize({h_, w_});
|
||||
test_ds_ = test_ds_->Map({&resize, &typecast_f}, {"image"});
|
||||
|
||||
TypeCast typecast(mindspore::DataType::kNumberTypeInt32);
|
||||
test_ds_ = test_ds_->Map({&typecast}, {"label"});
|
||||
test_ds_ = test_ds_->Batch(batch_size_, true);
|
||||
|
||||
Rescaler rescale(kScalePoint);
|
||||
|
||||
loop_->Eval(test_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale}, nullptr, INT_MAX);
|
||||
std::cout << "Accuracy is " << acc_metrics_->Eval() << std::endl;
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
int NetRunner::InitDB() {
|
||||
train_ds_ = Mnist(data_dir_ + "/train", "all", std::make_shared<SequentialSampler>(0, 0));
|
||||
|
||||
TypeCast typecast_f(mindspore::DataType::kNumberTypeFloat32);
|
||||
Resize resize({h_, w_});
|
||||
train_ds_ = train_ds_->Map({&resize, &typecast_f}, {"image"});
|
||||
|
||||
TypeCast typecast(mindspore::DataType::kNumberTypeInt32);
|
||||
train_ds_ = train_ds_->Map({&typecast}, {"label"});
|
||||
|
||||
train_ds_ = train_ds_->Batch(batch_size_, true);
|
||||
|
||||
if (verbose_) {
|
||||
std::cout << "DatasetSize is " << train_ds_->GetDatasetSize() << std::endl;
|
||||
}
|
||||
if (train_ds_->GetDatasetSize() == 0) {
|
||||
std::cout << "No relevant data was found in " << data_dir_ << std::endl;
|
||||
MS_ASSERT(train_ds_->GetDatasetSize() != 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NetRunner::TrainLoop() {
|
||||
mindspore::lite::LossMonitor lm(kPrintTimes);
|
||||
mindspore::lite::ClassificationTrainAccuracyMonitor am(1);
|
||||
|
||||
mindspore::lite::CkptSaver cs(kSaveSteps, std::string("lenet"));
|
||||
Rescaler rescale(kScalePoint);
|
||||
Measurement measure(epochs_);
|
||||
|
||||
if (virtual_batch_ > 0) {
|
||||
loop_->Train(epochs_, train_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &measure},
|
||||
nullptr);
|
||||
} else {
|
||||
struct mindspore::lite::StepLRLambda step_lr_lambda(1, kGammaFactor);
|
||||
mindspore::lite::LRScheduler step_lr_sched(mindspore::lite::StepLRLambda, static_cast<void *>(&step_lr_lambda), 1);
|
||||
loop_->Train(epochs_, train_ds_.get(),
|
||||
std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &step_lr_sched, &measure}, nullptr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NetRunner::Main() {
|
||||
InitAndFigureInputs();
|
||||
|
||||
InitDB();
|
||||
|
||||
TrainLoop();
|
||||
|
||||
CalculateAccuracy();
|
||||
|
||||
if (epochs_ > 0) {
|
||||
auto trained_fn = ms_file_.substr(0, ms_file_.find_last_of('.')) + "_trained.ms";
|
||||
session_->Export(trained_fn);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void NetRunner::Usage() {
|
||||
std::cout << "Usage: net_runner -f <.ms model file> -d <data_dir> [-e <num of training epochs>] "
|
||||
<< "[-v (verbose mode)] [-s <save checkpoint every X iterations>]" << std::endl;
|
||||
}
|
||||
|
||||
bool NetRunner::ReadArgs(int argc, char *argv[]) {
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "f:e:d:s:ihc:vob:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'f':
|
||||
ms_file_ = std::string(optarg);
|
||||
break;
|
||||
case 'e':
|
||||
epochs_ = atoi(optarg);
|
||||
break;
|
||||
case 'd':
|
||||
data_dir_ = std::string(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
verbose_ = true;
|
||||
break;
|
||||
case 's':
|
||||
save_checkpoint_ = atoi(optarg);
|
||||
break;
|
||||
case 'o':
|
||||
enable_fp16_ = true;
|
||||
break;
|
||||
case 'b':
|
||||
virtual_batch_ = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
Usage();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
NetRunner nr;
|
||||
|
||||
if (nr.ReadArgs(argc, argv)) {
|
||||
nr.Main();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_NET_RUNNER_H_
|
||||
#define MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_NET_RUNNER_H_
|
||||
|
||||
#include <tuple>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "include/train/train_loop.h"
|
||||
#include "include/train/accuracy_metrics.h"
|
||||
#include "include/ms_tensor.h"
|
||||
#include "include/dataset/datasets.h"
|
||||
|
||||
using mindspore::dataset::Dataset;
|
||||
using mindspore::lite::AccuracyMetrics;
|
||||
|
||||
class NetRunner {
|
||||
public:
|
||||
int Main();
|
||||
bool ReadArgs(int argc, char *argv[]);
|
||||
~NetRunner();
|
||||
|
||||
private:
|
||||
void Usage();
|
||||
void InitAndFigureInputs();
|
||||
int InitDB();
|
||||
int TrainLoop();
|
||||
float CalculateAccuracy(int max_tests = 0);
|
||||
float GetLoss() const;
|
||||
mindspore::tensor::MSTensor *SearchOutputsForSize(size_t size) const;
|
||||
|
||||
mindspore::session::LiteSession *session_ = nullptr;
|
||||
mindspore::session::TrainLoop *loop_ = nullptr;
|
||||
|
||||
std::shared_ptr<Dataset> train_ds_;
|
||||
std::shared_ptr<Dataset> test_ds_;
|
||||
std::shared_ptr<AccuracyMetrics> acc_metrics_;
|
||||
|
||||
std::string ms_file_ = "";
|
||||
std::string data_dir_ = "";
|
||||
unsigned int epochs_ = 10;
|
||||
bool verbose_ = false;
|
||||
bool enable_fp16_ = false;
|
||||
int virtual_batch_ = -1;
|
||||
int save_checkpoint_ = 0;
|
||||
int batch_size_ = 32;
|
||||
int h_ = 32;
|
||||
int w_ = 32;
|
||||
};
|
||||
|
||||
#endif // MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_NET_RUNNER_H_
|
|
@ -1,31 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_UTILS_H_
|
||||
#define MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_UTILS_H_
|
||||
|
||||
// DEBUG should be defined because the source code use assert to test exception
|
||||
// but the Code Analysis Tool not allow us to use assert directly
|
||||
#define DEBUG TRUE
|
||||
|
||||
#ifdef DEBUG
|
||||
#include <cassert>
|
||||
#define MS_ASSERT(f) assert(f)
|
||||
#else
|
||||
#define MS_ASSERT(f) ((void)0)
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_EXAMPLES_TRAIN_LENET_SRC_UTILS_H_
|
|
@ -464,30 +464,6 @@ function Run_CodeExamples() {
|
|||
rm -rf package*/dataset
|
||||
cd -
|
||||
fi
|
||||
|
||||
should_run_example "train_lenet"
|
||||
should_run=$?
|
||||
if [[ "$should_run" == "1" ]]; then
|
||||
cd ${basepath}/../../examples/train_lenet || exit 1
|
||||
chmod 777 ./prepare_and_run.sh
|
||||
chmod 777 ./*/*.sh
|
||||
./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
|
||||
if [ "$?" != "0" ]; then
|
||||
echo "train_lenet prepare_and_run.sh failed"
|
||||
exit 1
|
||||
fi
|
||||
accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
|
||||
if [ $accurate -eq 1 ]; then
|
||||
echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file}
|
||||
echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file}
|
||||
else
|
||||
echo "Train Lenet demo failure" >> ${run_code_examples_log_file}
|
||||
echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file}
|
||||
fail=1
|
||||
fi
|
||||
rm -rf package*/dataset
|
||||
cd -
|
||||
fi
|
||||
fi
|
||||
return ${fail}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue