yolov5 add 310 infer

This commit is contained in:
chenweitao_295 2021-06-07 19:45:39 +08:00
parent 430c03f3c9
commit c797f1453d
10 changed files with 959 additions and 33 deletions

View File

@ -14,12 +14,15 @@
- [Evaluation](#testing)
- [Evaluation Process](#evaluation-process)
- [Evaluation](#evaluation)
- [Convert Process](#convert-process)
- [Convert](#convert)
- [Inference Process](#inference-process)
- [Export MindIR](#export-mindir)
- [Infer on Ascend310](#infer-on-ascend310)
- [result](#result)
- [Model Description](#model-description)
- [Performance](#performance)
- [Evaluation Performance](#evaluation-performance)
- [Inference Performance](#inference-performance)
- [310 Inference Performance](#310-inference-performance)
- [ModelZoo Homepage](#modelzoo-homepage)
# [YOLOv5 Description](#contents)
@ -125,9 +128,11 @@ sh run_eval.sh dataset/xxx checkpoint/xxx.ckpt
└─yolov5
├─README.md
├─mindspore_hub_conf.md # config for mindspore hub
├─ascend310_infer # application for 310 inference
├─scripts
├─run_standalone_train.sh # launch standalone training(1p) in ascend
├─run_distribute_train.sh # launch distributed training(8p) in ascend
├─run_infer_310.sh # launch 310 inference in ascend
└─run_eval.sh # launch evaluating in ascend
├─src
├─__init__.py # python init file
@ -142,9 +147,9 @@ sh run_eval.sh dataset/xxx checkpoint/xxx.ckpt
├─util.py # util function
├─yolo.py # yolov5 network
├─yolo_dataset.py # create dataset for YOLOV5
├─eval.py # evaluate val results
├─export.py # convert mindspore model to air model
├─postprocess.py # postprocess script
└─train.py # train net
```
@ -306,14 +311,51 @@ The above python command will run in the background. You can view the results th
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.674
```
## [Convert Process](#contents)
## [Inference process](#contents)
### Convert
### Export MindIR
If you want to infer the network on Ascend 310, you should convert the model to AIR:
```shell
python export.py --ckpt_file [CKPT_PATH] --file_format [EXPORT_FORMAT] --batch_size [BATCH_SIZE]
```
```python
python export.py [BATCH_SIZE] [PRETRAINED_BACKBONE]
The ckpt_file parameter is required,
`EXPORT_FORMAT` should be in ["AIR", "MINDIR"].Current model only support CPU MODE.
`BATCH_SIZE` current batch_size can only be set to 1.
### Infer on Ascend310
Before performing inference, the mindir file must be exported by `export.py` script. We only provide an example of inference using MINDIR model.
Current batch_size can only be set to 1.
```shell
# Ascend310 inference
bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANN_FILE] [DVPP] [DEVICE_ID]
```
- `ANN_FILE` annotations file path.
- `DVPP` is mandatory, and must choose from ["DVPP", "CPU"], it's case-insensitive. Current model only support CPU MODE.
- `DEVICE_ID` is optional, default value is 0.
### result
Inference result is saved in current path, you can find result like this in acc.log file.
```bash
# acc.log
=============coco 310 infer reulst=========
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.369
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.571
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.398
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.216
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.421
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.487
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.301
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.502
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.558
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.388
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.617
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.677
```
# [Model Description](#contents)
@ -355,6 +397,20 @@ YOLOv5 on 5K images(The annotation and data format must be the same as coco val2
| Accuracy | map=36.8~37.2%(shape=640) |
| Model for inference | 58M (.ckpt file) |
### 310 Inference Performance
| Parameters | Ascend |
| ------------------- | ---------------------------------------- |
| Model Version | YOLOv5s |
| Resource | Ascend 310; CentOS 3.10 |
| Uploaded Date | 07/06/2021 (month/day/year) |
| MindSpore Version | 1.2.0 |
| Dataset | Coco2017 5K images |
| batch_size | 1 |
| outputs | box position and sorces, and probability |
| Accuracy | Accuracy=0.71654 |
| Model for inference | 58M(.ckpt file) |
# [Description of Random Situation](#contents)
In dataset.py, we set the seed inside ```create_dataset``` function.

View File

@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.14.1)
project(Ascend310Infer)
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
option(MINDSPORE_PATH "mindspore install path" "")
include_directories(${MINDSPORE_PATH})
include_directories(${MINDSPORE_PATH}/include)
include_directories(${PROJECT_SRC_ROOT})
find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)
add_executable(main src/main.cc src/utils.cc)
target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)

View File

@ -0,0 +1,23 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ ! -d out ]; then
mkdir out
fi
cd out || exit
cmake .. \
-DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
make

View File

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_INFERENCE_UTILS_H_
#define MINDSPORE_INFERENCE_UTILS_H_
#include <sys/stat.h>
#include <dirent.h>
#include <vector>
#include <string>
#include <memory>
#include "include/api/types.h"
std::vector<std::string> GetAllFiles(std::string_view dirName);
DIR *OpenDir(std::string_view dirName);
std::string RealPath(std::string_view path);
mindspore::MSTensor ReadFileToTensor(const std::string &file);
int WriteResult(const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs);
#endif

View File

@ -0,0 +1,213 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include <gflags/gflags.h>
#include <dirent.h>
#include <math.h>
#include <iostream>
#include <string>
#include <algorithm>
#include <iosfwd>
#include <vector>
#include <fstream>
#include <sstream>
#include "include/api/model.h"
#include "include/api/context.h"
#include "include/api/types.h"
#include "include/api/serialization.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision_ascend.h"
#include "include/dataset/execute.h"
#include "include/dataset/vision.h"
#include "inc/utils.h"
using mindspore::Context;
using mindspore::Serialization;
using mindspore::Model;
using mindspore::Status;
using mindspore::ModelType;
using mindspore::GraphCell;
using mindspore::kSuccess;
using mindspore::MSTensor;
using mindspore::DataType;
using mindspore::dataset::Execute;
using mindspore::dataset::InterpolationMode;
using mindspore::dataset::TensorTransform;
using mindspore::dataset::vision::Resize;
using mindspore::dataset::vision::HWC2CHW;
using mindspore::dataset::vision::Normalize;
using mindspore::dataset::vision::Decode;
DEFINE_string(mindir_path, "", "mindir path");
DEFINE_string(dataset_path, ".", "dataset path");
DEFINE_int32(device_id, 0, "device id");
DEFINE_int32(image_height, 640, "image height");
DEFINE_int32(image_width, 640, "image width");
int TransFormImg(MSTensor *input, MSTensor *output) {
void *imgput;
void *imgoutput;
float *address_img;
float *address;
imgput = input->MutableData();
imgoutput = output->MutableData();
address_img = static_cast<float *>(imgput);
address = static_cast<float *>(imgoutput);
int new_height = static_cast<int>(FLAGS_image_height) / 2;
int new_width = static_cast<int>(FLAGS_image_width) / 2;
std::vector<int64_t> input_shape = input->Shape();
int channel = static_cast<int> (input_shape[0]);
int new_channel = channel * 4; // concatenate image
int outIdx = 0;
int imgIdx = 0;
for (int new_c = 0; new_c < new_channel; new_c++) {
int c = new_c % channel;
for (int new_h = 0; new_h < new_height; new_h++) {
for (int new_w = 0; new_w < new_width; new_w++) {
if (new_c < channel) {
outIdx = new_c * new_height * new_width + new_h * new_width + new_w;
imgIdx = c * static_cast<int>(FLAGS_image_height) * static_cast<int>(FLAGS_image_width) +
new_h * 2 * static_cast<int>(FLAGS_image_width) + new_w * 2;
} else if (channel <= new_c && new_c < channel * 2) {
outIdx = new_c * new_height * new_width + new_h * new_width + new_w;
imgIdx = c * static_cast<int>(FLAGS_image_height) * static_cast<int>(FLAGS_image_width) +
static_cast<int>((new_h + 0.5) * 2 * static_cast<int>(FLAGS_image_width)) + new_w * 2;
} else if (channel * 2 <= new_c && new_c< channel * 3) {
outIdx = new_c * new_height * new_width + new_h * new_width + new_w;
imgIdx = c * static_cast<int>(FLAGS_image_height) * static_cast<int>(FLAGS_image_width) +
new_h * 2 * static_cast<int>(FLAGS_image_width) + static_cast<int>((new_w + 0.5) * 2);
} else if (channel * 3 <= new_c && new_c< channel * 4) {
outIdx = new_c * new_height * new_width + new_h * new_width + new_w;
imgIdx = c * static_cast<int>(FLAGS_image_height) * static_cast<int>(FLAGS_image_width) +
static_cast<int>((new_h + 0.5) * 2 * static_cast<int>(FLAGS_image_width)) +
static_cast<int>((new_w + 0.5) * 2);
} else {
std::cout << "new channels Out of range." << std::endl;
return 1;
}
address[outIdx] = address_img[imgIdx];
}
}
}
return 0;
}
int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (RealPath(FLAGS_mindir_path).empty()) {
std::cout << "Invalid mindir" << std::endl;
return 1;
}
auto context = std::make_shared<Context>();
auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>();
ascend310->SetDeviceID(FLAGS_device_id);
ascend310->SetPrecisionMode("allow_fp32_to_fp16");
ascend310->SetOpSelectImplMode("high_precision");
ascend310->SetBufferOptimizeMode("off_optimize");
context->MutableDeviceInfo().push_back(ascend310);
mindspore::Graph graph;
Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);
Model model;
Status ret = model.Build(GraphCell(graph), context);
if (ret != kSuccess) {
std::cout << "ERROR: Build failed." << std::endl;
return 1;
}
auto all_files = GetAllFiles(FLAGS_dataset_path);
std::map<double, double> costTime_map;
size_t size = all_files.size();
std::shared_ptr<TensorTransform> decode(new Decode());
auto resize = Resize({FLAGS_image_height, FLAGS_image_width});
auto normalize = Normalize({123.675, 116.28, 103.53}, {58.395, 57.120, 57.375});
auto hwc2chw = HWC2CHW();
Execute composeDecode({decode});
for (size_t i = 0; i < size; ++i) {
struct timeval start = {0};
struct timeval end = {0};
double startTimeMs;
double endTimeMs;
std::vector<MSTensor> inputs;
std::vector<MSTensor> outputs;
auto imgDecode = MSTensor();
auto img = MSTensor();
composeDecode(ReadFileToTensor(all_files[i]), &imgDecode);
std::vector<int64_t> shape = imgDecode.Shape();
if ((static_cast<int> (shape[0]) < static_cast<int>(FLAGS_image_height)) &&
(static_cast<int> (shape[1]) < static_cast<int>(FLAGS_image_width))) {
resize = Resize({FLAGS_image_height, FLAGS_image_width}, InterpolationMode::kCubic);
} else if ((static_cast<int> (shape[0]) > static_cast<int>(FLAGS_image_height)) &&
(static_cast<int> (shape[1]) > static_cast<int>(FLAGS_image_width))) {
resize = Resize({FLAGS_image_height, FLAGS_image_width}, InterpolationMode::kNearestNeighbour);
} else {
resize = Resize({FLAGS_image_height, FLAGS_image_width}, InterpolationMode::kLinear);
}
if ((sizeof(shape)/sizeof(shape[0])) <= 2) {
std::cout << "image channels is not 3." << std::endl;
return 1;
}
Execute transform({resize, normalize, hwc2chw});
transform(imgDecode, &img);
size_t buffer_size = img.DataSize();
std::vector<int64_t> img_shape = img.Shape();
mindspore::MSTensor buffer("output", mindspore::DataType::kNumberTypeFloat32,
{static_cast<int64_t>(img_shape[0] * 4), static_cast<int64_t>(FLAGS_image_height) / 2,
static_cast<int64_t>(FLAGS_image_width) / 2},
nullptr, buffer_size);
TransFormImg(&img, &buffer);
std::vector<MSTensor> model_inputs = model.GetInputs();
inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(),
buffer.Data().get(), buffer.DataSize());
gettimeofday(&start, nullptr);
ret = model.Predict(inputs, &outputs);
gettimeofday(&end, nullptr);
if (ret != kSuccess) {
std::cout << "Predict " << all_files[i] << " failed." << std::endl;
return 1;
}
startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs));
WriteResult(all_files[i], outputs);
}
double average = 0.0;
int inferCount = 0;
for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
double diff = 0.0;
diff = iter->second - iter->first;
average += diff;
inferCount++;
}
average = average / inferCount;
std::stringstream timeCost;
timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl;
std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl;
std::string fileName = "./time_Result" + std::string("/test_perform_static.txt");
std::ofstream fileStream(fileName.c_str(), std::ios::trunc);
fileStream << timeCost.str();
fileStream.close();
costTime_map.clear();
return 0;
}

View File

@ -0,0 +1,130 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "inc/utils.h"
#include <fstream>
#include <algorithm>
#include <iostream>
using mindspore::MSTensor;
using mindspore::DataType;
std::vector<std::string> GetAllFiles(std::string_view dirName) {
struct dirent *filename;
DIR *dir = OpenDir(dirName);
if (dir == nullptr) {
return {};
}
std::vector<std::string> res;
while ((filename = readdir(dir)) != nullptr) {
std::string dName = std::string(filename->d_name);
if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
continue;
}
res.emplace_back(std::string(dirName) + "/" + filename->d_name);
}
std::sort(res.begin(), res.end());
for (auto &f : res) {
std::cout << "image file: " << f << std::endl;
}
return res;
}
int WriteResult(const std::string& imageFile, const std::vector<MSTensor> &outputs) {
std::string homePath = "./result_Files";
for (size_t i = 0; i < outputs.size(); ++i) {
size_t outputSize;
std::shared_ptr<const void> netOutput;
netOutput = outputs[i].Data();
outputSize = outputs[i].DataSize();
int pos = imageFile.rfind('/');
std::string fileName(imageFile, pos + 1);
fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin");
std::string outFileName = homePath + "/" + fileName;
FILE * outputFile = fopen(outFileName.c_str(), "wb");
fwrite(netOutput.get(), outputSize, sizeof(char), outputFile);
fclose(outputFile);
outputFile = nullptr;
}
return 0;
}
mindspore::MSTensor ReadFileToTensor(const std::string &file) {
if (file.empty()) {
std::cout << "Pointer file is nullptr" << std::endl;
return mindspore::MSTensor();
}
std::ifstream ifs(file);
if (!ifs.good()) {
std::cout << "File: " << file << " is not exist" << std::endl;
return mindspore::MSTensor();
}
if (!ifs.is_open()) {
std::cout << "File: " << file << "open failed" << std::endl;
return mindspore::MSTensor();
}
ifs.seekg(0, std::ios::end);
size_t size = ifs.tellg();
mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size);
ifs.seekg(0, std::ios::beg);
ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size);
ifs.close();
return buffer;
}
DIR *OpenDir(std::string_view dirName) {
if (dirName.empty()) {
std::cout << " dirName is null ! " << std::endl;
return nullptr;
}
std::string realPath = RealPath(dirName);
struct stat s;
lstat(realPath.c_str(), &s);
if (!S_ISDIR(s.st_mode)) {
std::cout << "dirName is not a valid directory !" << std::endl;
return nullptr;
}
DIR *dir;
dir = opendir(realPath.c_str());
if (dir == nullptr) {
std::cout << "Can not open dir " << dirName << std::endl;
return nullptr;
}
std::cout << "Successfully opened the dir " << dirName << std::endl;
return dir;
}
std::string RealPath(std::string_view path) {
char realPathMem[PATH_MAX] = {0};
char *realPathRet = nullptr;
realPathRet = realpath(path.data(), realPathMem);
if (realPathRet == nullptr) {
std::cout << "File: " << path << " is not exist.";
return "";
}
std::string realPath(realPathMem);
std::cout << path << " realpath is: " << realPath << std::endl;
return realPath;
}

View File

@ -18,8 +18,8 @@ import numpy as np
import mindspore
from mindspore import context, Tensor
from mindspore.train.serialization import export, load_checkpoint, load_param_into_net
from src.yolo import YOLOV5s
from src.config import ConfigYOLOV5
from src.yolo import YOLOV5s_Infer
parser = argparse.ArgumentParser(description='yolov5 export')
parser.add_argument("--device_id", type=int, default=0, help="Device id")
@ -27,7 +27,7 @@ parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument("--testing_shape", type=int, default=640, help="test shape")
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--file_name", type=str, default="yolov5", help="output file name.")
parser.add_argument('--file_format', type=str, choices=["AIR", "ONNX", "MINDIR"], default='AIR', help='file format')
parser.add_argument('--file_format', type=str, choices=["AIR", "MINDIR"], default='AIR', help='file format')
parser.add_argument("--device_target", type=str, choices=["Ascend", "GPU", "CPU"], default="Ascend",
help="device target")
args = parser.parse_args()
@ -37,14 +37,16 @@ if args.device_target == "Ascend":
context.set_context(device_id=args.device_id)
if __name__ == "__main__":
ts_shape = args.testing_shape
config = ConfigYOLOV5()
if args.testing_shape:
config.test_img_shape = [int(args.testing_shape), int(args.testing_shape)]
ts_shape = config.test_img_shape[0]
network = YOLOV5s(is_training=False)
network.set_train(False)
network = YOLOV5s_Infer(config.test_img_shape)
param_dict = load_checkpoint(args.ckpt_file)
load_param_into_net(network, param_dict)
input_data = Tensor(np.zeros([args.batch_size, 3, ts_shape, ts_shape]), mindspore.float32)
input_data = Tensor(np.zeros([args.batch_size, 12, int(ts_shape/2), int(ts_shape/2)]), mindspore.float32)
export(network, input_data, file_name=args.file_name, file_format=args.file_format)

View File

@ -0,0 +1,328 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""YoloV5 310 infer."""
import os
import sys
import argparse
import datetime
import time
import ast
from collections import defaultdict
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from src.logger import get_logger
parser = argparse.ArgumentParser('yolov5 postprocess')
# dataset related
parser.add_argument('--per_batch_size', default=1, type=int, help='batch size for per gpu')
# logging related
parser.add_argument('--log_path', type=str, default='outputs/', help='checkpoint save location')
# detect_related
parser.add_argument('--nms_thresh', type=float, default=0.6, help='threshold for NMS')
parser.add_argument('--ann_file', type=str, default='', help='path to annotation')
parser.add_argument('--ignore_threshold', type=float, default=0.001, help='threshold to throw low quality boxes')
parser.add_argument('--dataset_path', type=str, default='', help='path of image dataset')
parser.add_argument('--result_files', type=str, default='./result_Files', help='path to 310 infer result path')
parser.add_argument('--multi_label', type=ast.literal_eval, default=True, help='whether to use multi label')
parser.add_argument('--multi_label_thresh', type=float, default=0.1, help='threshold to throw low quality boxes')
args, _ = parser.parse_known_args()
class Redirct:
def __init__(self):
self.content = ""
def write(self, content):
self.content += content
def flush(self):
self.content = ""
class DetectionEngine:
"""Detection engine."""
def __init__(self, args_detection):
self.ignore_threshold = args_detection.ignore_threshold
self.labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
self.num_classes = len(self.labels)
self.results = {}
self.file_path = ''
self.save_prefix = args_detection.outputs_dir
self.ann_file = args_detection.ann_file
self._coco = COCO(self.ann_file)
self._img_ids = list(sorted(self._coco.imgs.keys()))
self.det_boxes = []
self.nms_thresh = args_detection.nms_thresh
self.multi_label = args_detection.multi_label
self.multi_label_thresh = args_detection.multi_label_thresh
# self.coco_catids = self._coco.getCatIds()
self.coco_catIds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27,
28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 84, 85, 86, 87, 88, 89, 90]
def do_nms_for_results(self):
"""Get result boxes."""
# np.save('/opt/disk1/hjc/yolov5_positive_policy/result.npy', self.results)
for image_id in self.results:
for clsi in self.results[image_id]:
dets = self.results[image_id][clsi]
dets = np.array(dets)
keep_index = self._diou_nms(dets, thresh=self.nms_thresh)
keep_box = [{'image_id': int(image_id),
'category_id': int(clsi),
'bbox': list(dets[i][:4].astype(float)),
'score': dets[i][4].astype(float)}
for i in keep_index]
self.det_boxes.extend(keep_box)
def _nms(self, predicts, threshold):
"""Calculate NMS."""
# convert xywh -> xmin ymin xmax ymax
x1 = predicts[:, 0]
y1 = predicts[:, 1]
x2 = x1 + predicts[:, 2]
y2 = y1 + predicts[:, 3]
scores = predicts[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
reserved_boxes = []
while order.size > 0:
i = order[0]
reserved_boxes.append(i)
max_x1 = np.maximum(x1[i], x1[order[1:]])
max_y1 = np.maximum(y1[i], y1[order[1:]])
min_x2 = np.minimum(x2[i], x2[order[1:]])
min_y2 = np.minimum(y2[i], y2[order[1:]])
intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
intersect_area = intersect_w * intersect_h
ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area)
indexes = np.where(ovr <= threshold)[0]
order = order[indexes + 1]
return reserved_boxes
def _diou_nms(self, dets, thresh=0.5):
"""
convert xywh -> xmin ymin xmax ymax
"""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = x1 + dets[:, 2]
y2 = y1 + dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
center_x1 = (x1[i] + x2[i]) / 2
center_x2 = (x1[order[1:]] + x2[order[1:]]) / 2
center_y1 = (y1[i] + y2[i]) / 2
center_y2 = (y1[order[1:]] + y2[order[1:]]) / 2
inter_diag = (center_x2 - center_x1) ** 2 + (center_y2 - center_y1) ** 2
out_max_x = np.maximum(x2[i], x2[order[1:]])
out_max_y = np.maximum(y2[i], y2[order[1:]])
out_min_x = np.minimum(x1[i], x1[order[1:]])
out_min_y = np.minimum(y1[i], y1[order[1:]])
outer_diag = (out_max_x - out_min_x) ** 2 + (out_max_y - out_min_y) ** 2
diou = ovr - inter_diag / outer_diag
diou = np.clip(diou, -1, 1)
inds = np.where(diou <= thresh)[0]
order = order[inds + 1]
return keep
def write_result(self):
"""Save result to file."""
import json
t = datetime.datetime.now().strftime('_%Y_%m_%d_%H_%M_%S')
try:
self.file_path = self.save_prefix + '/predict' + t + '.json'
f = open(self.file_path, 'w')
json.dump(self.det_boxes, f)
except IOError as e:
raise RuntimeError("Unable to open json file to dump. What(): {}".format(str(e)))
else:
f.close()
return self.file_path
def get_eval_result(self):
"""Get eval result."""
coco_gt = COCO(self.ann_file)
coco_dt = coco_gt.loadRes(self.file_path)
coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
rdct = Redirct()
stdout = sys.stdout
sys.stdout = rdct
coco_eval.summarize()
sys.stdout = stdout
return rdct.content
def detect(self, outputs, batch, img_shape, image_id):
"""Detect boxes."""
outputs_num = len(outputs)
# output [|32, 52, 52, 3, 85| ]
for batch_id in range(batch):
for out_id in range(outputs_num):
# 32, 52, 52, 3, 85
out_item = outputs[out_id]
# 52, 52, 3, 85
out_item_single = out_item[batch_id, :]
# get number of items in one head, [B, gx, gy, anchors, 5+80]
dimensions = out_item_single.shape[:-1]
out_num = 1
for d in dimensions:
out_num *= d
ori_w, ori_h = img_shape[batch_id]
img_id = int(image_id[batch_id])
x = out_item_single[..., 0] * ori_w
y = out_item_single[..., 1] * ori_h
w = out_item_single[..., 2] * ori_w
h = out_item_single[..., 3] * ori_h
conf = out_item_single[..., 4:5]
cls_emb = out_item_single[..., 5:]
cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
x = x.reshape(-1)
y = y.reshape(-1)
w = w.reshape(-1)
h = h.reshape(-1)
x_top_left = x - w / 2.
y_top_left = y - h / 2.
cls_emb = cls_emb.reshape(-1, self.num_classes)
if self.multi_label:
conf = conf.reshape(-1, 1)
# create all False
confidence = cls_emb * conf
flag = cls_emb > self.multi_label_thresh
flag = flag.nonzero()
for index in range(len(flag[0])):
i = flag[0][index]
j = flag[1][index]
confi = confidence[i][j]
if confi < self.ignore_threshold:
continue
if img_id not in self.results:
self.results[img_id] = defaultdict(list)
x_lefti = max(0, x_top_left[i])
y_lefti = max(0, y_top_left[i])
wi = min(w[i], ori_w)
hi = min(h[i], ori_h)
clsi = j
# transform catId to match coco
coco_clsi = self.coco_catIds[clsi]
self.results[img_id][coco_clsi].append([x_lefti, y_lefti, wi, hi, confi])
else:
cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
conf = conf.reshape(-1)
cls_argmax = cls_argmax.reshape(-1)
# create all False
flag = np.random.random(cls_emb.shape) > sys.maxsize
for i in range(flag.shape[0]):
c = cls_argmax[i]
flag[i, c] = True
confidence = cls_emb[flag] * conf
for x_lefti, y_lefti, wi, hi, confi, clsi in zip(x_top_left, y_top_left, w, h, confidence,
cls_argmax):
if confi < self.ignore_threshold:
continue
if img_id not in self.results:
self.results[img_id] = defaultdict(list)
x_lefti = max(0, x_lefti)
y_lefti = max(0, y_lefti)
wi = min(wi, ori_w)
hi = min(hi, ori_h)
# transform catId to match coco
coco_clsi = self.coco_catids[clsi]
self.results[img_id][coco_clsi].append([x_lefti, y_lefti, wi, hi, confi])
if __name__ == "__main__":
start_time = time.time()
args.outputs_dir = os.path.join(args.log_path,
datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
args.logger = get_logger(args.outputs_dir, 0)
# init detection engine
detection = DetectionEngine(args)
coco = COCO(args.ann_file)
result_path = args.result_files
files = os.listdir(args.dataset_path)
for file in files:
img_ids_name = file.split('.')[0]
img_id_ = int(np.squeeze(img_ids_name))
imgIds = coco.getImgIds(imgIds=[img_id_])
img = coco.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]
image_shape = ((img['width'], img['height']),)
img_id_ = (np.squeeze(img_ids_name),)
result_path_0 = os.path.join(result_path, img_ids_name + "_0.bin")
result_path_1 = os.path.join(result_path, img_ids_name + "_1.bin")
result_path_2 = os.path.join(result_path, img_ids_name + "_2.bin")
output_small = np.fromfile(result_path_0, dtype=np.float32).reshape(1, 20, 20, 3, 85)
output_me = np.fromfile(result_path_1, dtype=np.float32).reshape(1, 40, 40, 3, 85)
output_big = np.fromfile(result_path_2, dtype=np.float32).reshape(1, 80, 80, 3, 85)
detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, img_id_)
args.logger.info('Calculating mAP...')
detection.do_nms_for_results()
result_file_path = detection.write_result()
args.logger.info('result file path: {}'.format(result_file_path))
eval_result = detection.get_eval_result()
cost_time = time.time() - start_time
args.logger.info('\n=============coco 310 infer reulst=========\n' + eval_result)
args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))

View File

@ -0,0 +1,108 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [[ $# -lt 4 || $# -gt 5 ]]; then
echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [ANN_FILE] [DVPP] [DEVICE_ID]
DVPP is mandatory, and must choose from [DVPP|CPU], it's case-insensitive
DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero"
exit 1
fi
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
model=$(get_real_path $1)
data_path=$(get_real_path $2)
ann_file=$(get_real_path $3)
DVPP=${4^^}
device_id=0
if [ $# == 5 ]; then
device_id=$5
fi
echo "mindir name: "$model
echo "dataset path: "$data_path
echo "ann file: "$ann_file
echo "image process mode: "$DVPP
echo "device id: "$device_id
export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
else
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/opp
fi
function compile_app()
{
cd ../ascend310_infer || exit
bash build.sh &> build.log
}
function infer()
{
cd - || exit
if [ -d result_Files ]; then
rm -rf ./result_Files
fi
if [ -d time_Result ]; then
rm -rf ./time_Result
fi
mkdir result_Files
mkdir time_Result
if [ "$DVPP" == "DVPP" ];then
echo "Only support CPU mode"
exit 1
elif [ "$DVPP" == "CPU" ]; then
../ascend310_infer/out/main --mindir_path=$model --dataset_path=$data_path --device_id=$device_id --image_height=640 --image_width=640 &> infer.log
else
echo "image process mode must be in [DVPP|CPU]"
exit 1
fi
}
function cal_acc()
{
python3.7 ../postprocess.py --result_files=./result_Files --dataset_path=$data_path --ann_file=$ann_file &> acc.log &
}
compile_app
if [ $? -ne 0 ]; then
echo "compile app code failed"
exit 1
fi
infer
if [ $? -ne 0 ]; then
echo " execute inference failed"
exit 1
fi
cal_acc
if [ $? -ne 0 ]; then
echo "calculate accuracy failed"
exit 1
fi

View File

@ -35,14 +35,14 @@ class YOLOv5(nn.Cell):
self.out_channel = out_channel
self.backbone = backbone
self.conv1 = Conv(512, 256, k=1, s=1)#10
self.C31 = C3(512, 256, n=1, shortcut=False)#11
self.conv1 = Conv(512, 256, k=1, s=1) # 10
self.C31 = C3(512, 256, n=1, shortcut=False) # 11
self.conv2 = Conv(256, 128, k=1, s=1)
self.C32 = C3(256, 128, n=1, shortcut=False)#13
self.C32 = C3(256, 128, n=1, shortcut=False) # 13
self.conv3 = Conv(128, 128, k=3, s=2)
self.C33 = C3(256, 256, n=1, shortcut=False)#15
self.C33 = C3(256, 256, n=1, shortcut=False) # 15
self.conv4 = Conv(256, 256, k=3, s=2)
self.C34 = C3(512, 512, n=1, shortcut=False)#17
self.C34 = C3(512, 512, n=1, shortcut=False) # 17
self.backblock1 = YoloBlock(128, 255)
self.backblock2 = YoloBlock(256, 255)
@ -62,24 +62,24 @@ class YOLOv5(nn.Cell):
backbone4, backbone6, backbone9 = self.backbone(x)
cv1 = self.conv1(backbone9)#10
cv1 = self.conv1(backbone9) # 10
ups1 = P.ResizeNearestNeighbor((img_hight / 16, img_width / 16))(cv1)
concat1 = self.concat((ups1, backbone6))
bcsp1 = self.C31(concat1)#13
bcsp1 = self.C31(concat1) # 13
cv2 = self.conv2(bcsp1)
ups2 = P.ResizeNearestNeighbor((img_hight / 8, img_width / 8))(cv2)#15
ups2 = P.ResizeNearestNeighbor((img_hight / 8, img_width / 8))(cv2) # 15
concat2 = self.concat((ups2, backbone4))
bcsp2 = self.C32(concat2)#17
bcsp2 = self.C32(concat2) # 17
cv3 = self.conv3(bcsp2)
concat3 = self.concat((cv3, cv2))
bcsp3 = self.C33(concat3)#20
bcsp3 = self.C33(concat3) # 20
cv4 = self.conv4(bcsp3)
concat4 = self.concat((cv4, cv1))
bcsp4 = self.C34(concat4)#23
small_object_output = self.backblock1(bcsp2) # h/8, w/8
medium_object_output = self.backblock2(bcsp3) # h/16, w/16
big_object_output = self.backblock3(bcsp4) # h/32, w/32
bcsp4 = self.C34(concat4) # 23
small_object_output = self.backblock1(bcsp2) # h/8, w/8
medium_object_output = self.backblock2(bcsp3) # h/16, w/16
big_object_output = self.backblock3(bcsp4) # h/32, w/32
return small_object_output, medium_object_output, big_object_output
@ -98,6 +98,7 @@ class YoloBlock(nn.Cell):
YoloBlock(12, 255)
"""
def __init__(self, in_channels, out_channels):
super(YoloBlock, self).__init__()
@ -145,7 +146,7 @@ class DetectionBlock(nn.Cell):
raise KeyError("Invalid scale value for DetectionBlock")
self.anchors = Tensor([self.config.anchor_scales[i] for i in idx], ms.float32)
self.num_anchors_per_scale = 3
self.num_attrib = 4+1+self.config.num_classes
self.num_attrib = 4 + 1 + self.config.num_classes
self.lambda_coord = 1
self.sigmoid = nn.Sigmoid()
@ -199,6 +200,7 @@ class DetectionBlock(nn.Cell):
class Iou(nn.Cell):
"""Calculate the iou of boxes"""
def __init__(self):
super(Iou, self).__init__()
self.min = P.Minimum()
@ -212,8 +214,8 @@ class Iou(nn.Cell):
"""
box1_xy = box1[:, :, :, :, :, :2]
box1_wh = box1[:, :, :, :, :, 2:4]
box1_mins = box1_xy - box1_wh / F.scalar_to_array(2.0) # topLeft
box1_maxs = box1_xy + box1_wh / F.scalar_to_array(2.0) # rightDown
box1_mins = box1_xy - box1_wh / F.scalar_to_array(2.0) # topLeft
box1_maxs = box1_xy + box1_wh / F.scalar_to_array(2.0) # rightDown
box2_xy = box2[:, :, :, :, :, :2]
box2_wh = box2[:, :, :, :, :, 2:4]
@ -237,6 +239,7 @@ class YoloLossBlock(nn.Cell):
"""
Loss block cell of YOLOV5 network.
"""
def __init__(self, scale, config=ConfigYOLOV5()):
super(YoloLossBlock, self).__init__()
self.config = config
@ -356,8 +359,23 @@ class YOLOV5s(nn.Cell):
return output_big, output_me, output_small
class YOLOV5s_Infer(nn.Cell):
"""
YOLOV5 Infer.
"""
def __init__(self, inputshape):
super(YOLOV5s_Infer, self).__init__()
self.network = YOLOV5s(is_training=False)
self.inputshape = inputshape
def construct(self, x):
return self.network(x, self.inputshape)
class YoloWithLossCell(nn.Cell):
"""YOLOV5 loss."""
def __init__(self, network):
super(YoloWithLossCell, self).__init__()
self.yolo_network = network
@ -367,7 +385,6 @@ class YoloWithLossCell(nn.Cell):
self.loss_small = YoloLossBlock('s', self.config)
self.tenser_to_array = P.TupleToArray()
def construct(self, x, y_true_0, y_true_1, y_true_2, gt_0, gt_1, gt_2, input_shape):
input_shape = F.shape(x)[2:4]
input_shape = F.cast(self.tenser_to_array(input_shape) * 2, ms.float32)
@ -381,6 +398,7 @@ class YoloWithLossCell(nn.Cell):
class TrainingWrapper(nn.Cell):
"""Training wrapper."""
def __init__(self, network, optimizer, sens=1.0):
super(TrainingWrapper, self).__init__(auto_prefix=False)
self.network = network
@ -414,6 +432,7 @@ class TrainingWrapper(nn.Cell):
class Giou(nn.Cell):
"""Calculating giou"""
def __init__(self):
super(Giou, self).__init__()
self.cast = P.Cast()
@ -449,6 +468,7 @@ class Giou(nn.Cell):
giou = C.clip_by_value(giou, -1.0, 1.0)
return giou
def xywh2x1y1x2y2(box_xywh):
boxes_x1 = box_xywh[..., 0:1] - box_xywh[..., 2:3] / 2
boxes_y1 = box_xywh[..., 1:2] - box_xywh[..., 3:4] / 2