forked from mindspore-Ecosystem/mindspore
add post training quantization of ssd
This commit is contained in:
parent
98bc1c93a3
commit
9240837adf
|
@ -23,6 +23,7 @@
|
|||
- [Export MindIR](#export-mindir)
|
||||
- [Infer on Ascend310](#infer-on-ascend310)
|
||||
- [result](#result)
|
||||
- [Post Training Quantization](#post-training-quantization)
|
||||
- [Model Description](#model-description)
|
||||
- [Performance](#performance)
|
||||
- [Evaluation Performance](#evaluation-performance)
|
||||
|
@ -541,6 +542,52 @@ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.659
|
|||
mAP: 0.33880018942412393
|
||||
```
|
||||
|
||||
### [Post Training Quantization](#contents)
|
||||
|
||||
Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
|
||||
Current quantization project bases on COCO2017 dataset.
|
||||
|
||||
1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
|
||||
|
||||
```shell
|
||||
python export_bin.py --config_path [YMAL CONFIG PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
|
||||
```
|
||||
|
||||
2. Export quantized AIR model.
|
||||
|
||||
Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
|
||||
|
||||
```shell
|
||||
python post_quant.py --config_path [YMAL CONFIG PATH] --checkpoint_path [CKPT_PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
|
||||
```
|
||||
|
||||
The quantized AIR file will be stored as "./results/ssd_quant.air".
|
||||
|
||||
3. Implement inference at Ascend310 platform.
|
||||
|
||||
```shell
|
||||
# Ascend310 quant inference
|
||||
bash run_quant_infer.sh [AIR_PATH] [IMAGE_DATA] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
|
||||
```
|
||||
|
||||
Inference result is saved in current path, you can find result like this in acc.log file.
|
||||
|
||||
```bash
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.237
|
||||
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.386
|
||||
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.240
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.042
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.200
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.425
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.255
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.404
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.441
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.136
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.707
|
||||
mAP: 0.23657619676441116
|
||||
```
|
||||
|
||||
## [Model Description](#contents)
|
||||
|
||||
### [Performance](#contents)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
- [导出MindIR](#导出mindir)
|
||||
- [在Ascend310执行推理](#在ascend310执行推理)
|
||||
- [结果](#结果)
|
||||
- [训练后量化推理](#训练后量化推理)
|
||||
- [模型描述](#模型描述)
|
||||
- [性能](#性能)
|
||||
- [评估性能](#评估性能)
|
||||
|
@ -463,6 +464,51 @@ Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.659
|
|||
mAP: 0.33880018942412393
|
||||
```
|
||||
|
||||
### [训练后量化推理](#contents)
|
||||
|
||||
训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下,依次执行以下步骤实现训练后量化推理。本训练后量化工程基于COCO2017数据集。
|
||||
|
||||
1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
|
||||
|
||||
```shell
|
||||
python export_bin.py --config_path [YMAL CONFIG PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
|
||||
```
|
||||
|
||||
2、导出训练后量化的AIR格式模型。
|
||||
|
||||
导出训练后量化模型需要配套的量化工具包,参考[官方地址](https://www.hiascend.com/software/cann/community)
|
||||
|
||||
```shell
|
||||
python post_quant.py --config_path [YMAL CONFIG PATH] --checkpoint_path [CKPT_PATH] --coco_root [COCO DATA DIR] --mindrecord_dir [MINDRECORD PATH]
|
||||
```
|
||||
|
||||
导出的模型会存储在./result/ssd_quant.air。
|
||||
|
||||
3、在Ascend310执行推理量化模型。
|
||||
|
||||
```shell
|
||||
# Ascend310 quant inference
|
||||
bash run_quant_infer.sh [AIR_PATH] [IMAGE_DATA] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]
|
||||
```
|
||||
|
||||
推理结果保存在脚本执行的当前路径,可以在acc.log中看到精度计算结果。
|
||||
|
||||
```bash
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.237
|
||||
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.386
|
||||
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.240
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.042
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.200
|
||||
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.425
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.255
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.404
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.441
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.136
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455
|
||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.707
|
||||
mAP: 0.23657619676441116
|
||||
```
|
||||
|
||||
# 模型描述
|
||||
|
||||
## 性能
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""postprocess for 310 inference"""
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import numpy as np
|
||||
from pycocotools.coco import COCO
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("ssd quant postprocess")
|
||||
parser.add_argument("--result_path", type=str, required=True, help="path to inference results.")
|
||||
parser.add_argument("--image_shape", type=str, required=True, help="path to image shape directory.")
|
||||
parser.add_argument("--image_id", type=str, required=True, help="path to image id directory.")
|
||||
parser.add_argument("--ann_file", type=str, required=True, help="path to annotation file.")
|
||||
parser.add_argument("--min_score", type=float, default=0.1, help="min box score threshold.")
|
||||
parser.add_argument("--nms_threshold", type=float, default=0.6, help="threshold of nms process.")
|
||||
parser.add_argument("--max_boxes", type=int, default=100, help="max number of detection boxes.")
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
|
||||
def apply_nms(all_boxes, all_scores, thres, max_boxes):
|
||||
"""Apply NMS to bboxes."""
|
||||
y1 = all_boxes[:, 0]
|
||||
x1 = all_boxes[:, 1]
|
||||
y2 = all_boxes[:, 2]
|
||||
x2 = all_boxes[:, 3]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
|
||||
order = all_scores.argsort()[::-1]
|
||||
keep = []
|
||||
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
|
||||
if len(keep) >= max_boxes:
|
||||
break
|
||||
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
|
||||
order = order[inds + 1]
|
||||
return keep
|
||||
|
||||
|
||||
def metrics(pred_data, anno_json):
|
||||
"""Calculate mAP of predicted bboxes."""
|
||||
|
||||
#Classes need to train or test.
|
||||
val_cls = ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
||||
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
|
||||
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
|
||||
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
|
||||
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
||||
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
||||
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
||||
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
||||
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
||||
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
||||
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
||||
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
||||
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
|
||||
'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
||||
'teddy bear', 'hair drier', 'toothbrush']
|
||||
num_classes = len(val_cls)
|
||||
val_cls_dict = {}
|
||||
for i, cls in enumerate(val_cls):
|
||||
val_cls_dict[i] = cls
|
||||
coco_gt = COCO(anno_json)
|
||||
classs_dict = {}
|
||||
cat_ids = coco_gt.loadCats(coco_gt.getCatIds())
|
||||
for cat in cat_ids:
|
||||
classs_dict[cat["name"]] = cat["id"]
|
||||
|
||||
predictions = []
|
||||
img_ids = []
|
||||
|
||||
for sample in pred_data:
|
||||
pred_boxes = sample['boxes']
|
||||
box_scores = sample['box_scores']
|
||||
img_id = sample['img_id']
|
||||
h, w = sample['image_shape']
|
||||
|
||||
final_boxes = []
|
||||
final_label = []
|
||||
final_score = []
|
||||
img_ids.append(img_id)
|
||||
|
||||
for c in range(1, num_classes):
|
||||
class_box_scores = box_scores[:, c]
|
||||
score_mask = class_box_scores > args.min_score
|
||||
class_box_scores = class_box_scores[score_mask]
|
||||
class_boxes = pred_boxes[score_mask] * [h, w, h, w]
|
||||
|
||||
if score_mask.any():
|
||||
nms_index = apply_nms(class_boxes, class_box_scores, args.nms_threshold, args.max_boxes)
|
||||
class_boxes = class_boxes[nms_index]
|
||||
class_box_scores = class_box_scores[nms_index]
|
||||
|
||||
final_boxes += class_boxes.tolist()
|
||||
final_score += class_box_scores.tolist()
|
||||
final_label += [classs_dict[val_cls_dict[c]]] * len(class_box_scores)
|
||||
|
||||
for loc, label, score in zip(final_boxes, final_label, final_score):
|
||||
res = {}
|
||||
res['image_id'] = img_id
|
||||
res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]]
|
||||
res['score'] = score
|
||||
res['category_id'] = label
|
||||
predictions.append(res)
|
||||
with open('predictions.json', 'w') as f:
|
||||
json.dump(predictions, f)
|
||||
|
||||
coco_dt = coco_gt.loadRes('predictions.json')
|
||||
E = COCOeval(coco_gt, coco_dt, iouType='bbox')
|
||||
E.params.imgIds = img_ids
|
||||
E.evaluate()
|
||||
E.accumulate()
|
||||
E.summarize()
|
||||
return E.stats[0]
|
||||
|
||||
|
||||
def calculate_acc(result_path, image_id):
|
||||
"""
|
||||
Calculate accuracy of VGG16 inference.
|
||||
|
||||
Args:
|
||||
result_path (str): the directory or inference result.
|
||||
image_id (str): the path of image_id directory.
|
||||
"""
|
||||
pred_data = []
|
||||
for file in os.listdir(image_id):
|
||||
id_num = int(np.fromfile(os.path.join(image_id, file), dtype=np.int32)[0])
|
||||
img_size = np.fromfile(os.path.join(args.image_shape, file), dtype=np.float32).reshape(1, -1)[0]
|
||||
img_ids_name = file.split(".")[0]
|
||||
image_shape = np.array([img_size[0], img_size[1]])
|
||||
result_path_0 = os.path.join(result_path, img_ids_name + "_output_0.bin")
|
||||
result_path_1 = os.path.join(result_path, img_ids_name + "_output_1.bin")
|
||||
boxes = np.fromfile(result_path_0, dtype=np.float32).reshape(1917, 4)
|
||||
box_scores = np.fromfile(result_path_1, dtype=np.float32).reshape(1917, 81)
|
||||
pred_data.append({"boxes": boxes, "box_scores": box_scores, "img_id": id_num, "image_shape": image_shape})
|
||||
mAP = metrics(pred_data, args.ann_file)
|
||||
print(f" mAP:{mAP}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
calculate_acc(args.result_path, args.image_id)
|
|
@ -0,0 +1 @@
|
|||
do_fusion:false
|
|
@ -0,0 +1,57 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""generate data and label needed for AIR model inference"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def generate_data():
|
||||
"""
|
||||
Generate data and label needed for AIR model inference at Ascend310 platform.
|
||||
"""
|
||||
mindrecord_file = create_mindrecord("coco", "ssd_eval.mindrecord", False)
|
||||
batch_size = 1
|
||||
ds = create_ssd_dataset(mindrecord_file, batch_size=batch_size, repeat_num=1, is_training=False,
|
||||
use_multiprocessing=False)
|
||||
cur_dir = os.getcwd()
|
||||
image_path = os.path.join(cur_dir, "./data/00_image_data")
|
||||
if not os.path.isdir(image_path):
|
||||
os.makedirs(image_path)
|
||||
img_id_path = os.path.join(cur_dir, "./data/01_image_id")
|
||||
if not os.path.isdir(img_id_path):
|
||||
os.makedirs(img_id_path)
|
||||
img_shape_path = os.path.join(cur_dir, "./data/02_image_shape")
|
||||
if not os.path.isdir(img_shape_path):
|
||||
os.makedirs(img_shape_path)
|
||||
total = ds.get_dataset_size()
|
||||
iter_num = 0
|
||||
for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
|
||||
file_name = "coco_bs_" + str(batch_size) + "_" + str(iter_num) + ".bin"
|
||||
img_id = data["img_id"]
|
||||
img_np = data["image"]
|
||||
img_shape = data["image_shape"]
|
||||
img_id.tofile(os.path.join(img_id_path, file_name))
|
||||
img_np.tofile(os.path.join(image_path, file_name))
|
||||
img_shape.tofile(os.path.join(img_shape_path, file_name))
|
||||
|
||||
iter_num += 1
|
||||
print("total images num: ", total)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.append("..")
|
||||
from src.dataset import create_ssd_dataset, create_mindrecord
|
||||
|
||||
generate_data()
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include "../inc/utils.h"
|
||||
#include "acl/acl.h"
|
||||
|
||||
/**
|
||||
* ModelProcess
|
||||
*/
|
||||
class ModelProcess {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*/
|
||||
ModelProcess();
|
||||
|
||||
/**
|
||||
* @brief Destructor
|
||||
*/
|
||||
~ModelProcess();
|
||||
|
||||
/**
|
||||
* @brief load model from file with mem
|
||||
* @param [in] modelPath: model path
|
||||
* @return result
|
||||
*/
|
||||
Result LoadModelFromFileWithMem(const char *modelPath);
|
||||
|
||||
/**
|
||||
* @brief unload model
|
||||
*/
|
||||
void Unload();
|
||||
|
||||
/**
|
||||
* @brief create model desc
|
||||
* @return result
|
||||
*/
|
||||
Result CreateDesc();
|
||||
|
||||
/**
|
||||
* @brief destroy desc
|
||||
*/
|
||||
void DestroyDesc();
|
||||
|
||||
/**
|
||||
* @brief create model input
|
||||
* @param [in] inputDataBuffer: input buffer
|
||||
* @param [in] bufferSize: input buffer size
|
||||
* @return result
|
||||
*/
|
||||
Result CreateInput(void *inputDataBuffer, size_t bufferSize);
|
||||
|
||||
/**
|
||||
* @brief destroy input resource
|
||||
*/
|
||||
void DestroyInput();
|
||||
|
||||
/**
|
||||
* @brief create output buffer
|
||||
* @return result
|
||||
*/
|
||||
Result CreateOutput();
|
||||
|
||||
/**
|
||||
* @brief destroy output resource
|
||||
*/
|
||||
void DestroyOutput();
|
||||
|
||||
/**
|
||||
* @brief model execute
|
||||
* @return result
|
||||
*/
|
||||
Result Execute();
|
||||
|
||||
/**
|
||||
* @brief dump model output result to file
|
||||
*/
|
||||
void DumpModelOutputResult(char *output_name);
|
||||
|
||||
/**
|
||||
* @brief get model output result
|
||||
*/
|
||||
void OutputModelResult();
|
||||
|
||||
private:
|
||||
uint32_t modelId_;
|
||||
size_t modelMemSize_;
|
||||
size_t modelWeightSize_;
|
||||
void *modelMemPtr_;
|
||||
void *modelWeightPtr_;
|
||||
bool loadFlag_; // model load flag
|
||||
aclmdlDesc *modelDesc_;
|
||||
aclmdlDataset *input_;
|
||||
aclmdlDataset *output_;
|
||||
};
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "../inc/utils.h"
|
||||
#include "acl/acl.h"
|
||||
|
||||
/**
|
||||
* SampleProcess
|
||||
*/
|
||||
class SampleProcess {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*/
|
||||
SampleProcess();
|
||||
|
||||
/**
|
||||
* @brief Destructor
|
||||
*/
|
||||
~SampleProcess();
|
||||
|
||||
/**
|
||||
* @brief init reousce
|
||||
* @return result
|
||||
*/
|
||||
Result InitResource();
|
||||
|
||||
/**
|
||||
* @brief sample process
|
||||
* @return result
|
||||
*/
|
||||
Result Process(char *om_path, char *input_folder);
|
||||
|
||||
void GetAllFiles(std::string path, std::vector<std::string> *files);
|
||||
|
||||
private:
|
||||
void DestroyResource();
|
||||
|
||||
int32_t deviceId_;
|
||||
aclrtContext context_;
|
||||
aclrtStream stream_;
|
||||
};
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args)
|
||||
#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args)
|
||||
#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args)
|
||||
|
||||
typedef enum Result {
|
||||
SUCCESS = 0,
|
||||
FAILED = 1
|
||||
} Result;
|
||||
|
||||
/**
|
||||
* Utils
|
||||
*/
|
||||
class Utils {
|
||||
public:
|
||||
/**
|
||||
* @brief create device buffer of file
|
||||
* @param [in] fileName: file name
|
||||
* @param [out] fileSize: size of file
|
||||
* @return device buffer of file
|
||||
*/
|
||||
static void *GetDeviceBufferOfFile(std::string fileName, uint32_t *fileSize);
|
||||
|
||||
/**
|
||||
* @brief create buffer of file
|
||||
* @param [in] fileName: file name
|
||||
* @param [out] fileSize: size of file
|
||||
* @return buffer of pic
|
||||
*/
|
||||
static void* ReadBinFile(std::string fileName, uint32_t *fileSize);
|
||||
};
|
||||
|
||||
#pragma once
|
|
@ -0,0 +1,89 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""do post training quantization for Ascend310"""
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from amct_mindspore.quantize_tool import create_quant_config
|
||||
from amct_mindspore.quantize_tool import quantize_model
|
||||
from amct_mindspore.quantize_tool import save_model
|
||||
import mindspore
|
||||
from mindspore import Tensor, context
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
|
||||
|
||||
def quant_ssd(network, dataset, input_data):
|
||||
"""
|
||||
Export post training quantization model of AIR format.
|
||||
|
||||
Args:
|
||||
network: the origin network for inference.
|
||||
dataset: the data for inference.
|
||||
input_data: the data used for constructing network. The shape and format of input data should be the same as
|
||||
actual data for inference.
|
||||
"""
|
||||
|
||||
# step2: create the quant config json file
|
||||
create_quant_config("./config.json", network, input_data, config_defination="./config.cfg")
|
||||
|
||||
# step3: do some network modification and return the modified network
|
||||
calibration_network = quantize_model("./config.json", network, input_data)
|
||||
calibration_network.set_train(False)
|
||||
|
||||
# step4: perform the evaluation of network to do activation calibration
|
||||
for data in dataset.create_dict_iterator(num_epochs=1):
|
||||
img_data = data["image"]
|
||||
_ = calibration_network(img_data)
|
||||
|
||||
# step5: export the air file
|
||||
save_model("results/ssd_quant", calibration_network, input_data)
|
||||
print("[INFO] the quantized AIR file has been stored at: \n {}".format("results/ssd_quant.air"))
|
||||
|
||||
|
||||
def run_export():
|
||||
"""
|
||||
Prepare input parameters needed for exporting quantization model.
|
||||
"""
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=config.device_id)
|
||||
if hasattr(config, "num_ssd_boxes") and config.num_ssd_boxes == -1:
|
||||
num = 0
|
||||
h, w = config.img_shape
|
||||
for i in range(len(config.steps)):
|
||||
num += (h // config.step[i]) * (w // config.step[i]) * config.num_default[i]
|
||||
config.num_ssd_boxes = num
|
||||
net = SSD300(ssd_mobilenet_v2(), config, is_training=False)
|
||||
net = SsdInferWithDecoder(net, Tensor(default_boxes), config)
|
||||
param_dict = load_checkpoint(config.checkpoint_path)
|
||||
net.init_parameters_data()
|
||||
load_param_into_net(net, param_dict)
|
||||
net.set_train(False)
|
||||
batch_size = 1
|
||||
input_shp = [batch_size, 3] + config.img_shape
|
||||
inputs = Tensor(np.random.uniform(-1.0, 1.0, size=input_shp), mindspore.float32)
|
||||
mindrecord_file = create_mindrecord("coco", "ssd_eval.mindrecord", False)
|
||||
batch_size = 1
|
||||
datasets = create_ssd_dataset(mindrecord_file, batch_size=batch_size, repeat_num=1, is_training=False,
|
||||
use_multiprocessing=False)
|
||||
ds = datasets.take(1)
|
||||
quant_ssd(net, ds, inputs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.path.append("..")
|
||||
from src.ssd import SSD300, SsdInferWithDecoder, ssd_mobilenet_v2
|
||||
from src.model_utils.config import config
|
||||
from src.dataset import create_ssd_dataset, create_mindrecord
|
||||
from src.box_utils import default_boxes
|
||||
run_export()
|
|
@ -0,0 +1,108 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -lt 5 ]; then
|
||||
echo "Usage: bash run_quant_infer.sh [AIR_PATH] [IMAGE_DATA] [IMAGE_ID] [IMAGE_SHAPE] [ANN_FILE]"
|
||||
echo "Example: bash run_quant_infer.sh ./ssd_quant.air ./00_image_data ./01_image_id ./02_image_shape \
|
||||
./instances_val2017.json"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
model=$(get_real_path $1)
|
||||
data_path=$(get_real_path $2)
|
||||
id_path=$(get_real_path $3)
|
||||
shape_path=$(get_real_path $4)
|
||||
ann_path=$(get_real_path $5)
|
||||
|
||||
echo "air name: "$model
|
||||
echo "image data path: "$data_path
|
||||
echo "image id path: "$id_path
|
||||
echo "image shape path: "$shape_path
|
||||
echo "annotation path: "$ann_path
|
||||
|
||||
export ASCEND_HOME=/usr/local/Ascend/
|
||||
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
|
||||
export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
|
||||
export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
|
||||
export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
|
||||
export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
|
||||
else
|
||||
export PATH=$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/fwkacllib/bin:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/fwkacllib/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$LD_LIBRARY_PATH
|
||||
export TBE_IMPL_PATH=$ASCEND_HOME/opp/op_impl/built-in/ai_core/tbe
|
||||
export PYTHONPATH=${TBE_IMPL_PATH}:$PYTHONPATH
|
||||
export ASCEND_OPP_PATH=$ASCEND_HOME/opp
|
||||
fi
|
||||
|
||||
function air_to_om()
|
||||
{
|
||||
atc --input_format=NCHW --framework=1 --model=$model --output=ssd_quant --soc_version=Ascend310 &> atc.log
|
||||
}
|
||||
|
||||
function compile_app()
|
||||
{
|
||||
bash ./src/build.sh &> build.log
|
||||
}
|
||||
|
||||
function infer()
|
||||
{
|
||||
if [ -d result ]; then
|
||||
rm -rf ./result
|
||||
fi
|
||||
mkdir result
|
||||
./out/main ./ssd_quant.om $data_path &> infer.log
|
||||
}
|
||||
|
||||
function cal_acc()
|
||||
{
|
||||
python3.7 ./acc.py --result_path=./result --image_id=$id_path --image_shape=$shape_path --ann_file=$ann_path &> acc.log
|
||||
}
|
||||
|
||||
echo "start atc================================================"
|
||||
air_to_om
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "air to om code failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "start compile============================================"
|
||||
compile_app
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "compile app code failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "start infer=============================================="
|
||||
infer
|
||||
if [ $? -ne 0 ]; then
|
||||
echo " execute inference failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "start calculate acc======================================"
|
||||
cal_acc
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "calculate accuracy failed"
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
|
||||
|
||||
# CMake lowest version requirement
|
||||
cmake_minimum_required(VERSION 3.5.1)
|
||||
# project information
|
||||
project(InferClassification)
|
||||
# Check environment variable
|
||||
if(NOT DEFINED ENV{ASCEND_HOME})
|
||||
message(FATAL_ERROR "please define environment variable:ASCEND_HOME")
|
||||
endif()
|
||||
|
||||
# Compile options
|
||||
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
|
||||
|
||||
# Skip build rpath
|
||||
set(CMAKE_SKIP_BUILD_RPATH True)
|
||||
|
||||
# Set output directory
|
||||
set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SRC_ROOT}/../out)
|
||||
|
||||
# Set include directory and library directory
|
||||
set(FWKACL_LIB_DIR $ENV{ASCEND_HOME}/fwkacllib)
|
||||
set(ACL_LIB_DIR $ENV{ASCEND_HOME}/acllib)
|
||||
set(ATLAS_ACL_LIB_DIR $ENV{ASCEND_HOME}/ascend-toolkit/latest/acllib)
|
||||
|
||||
# Header path
|
||||
include_directories(${ACL_LIB_DIR}/include/)
|
||||
include_directories(${FWKACL_LIB_DIR}/include/)
|
||||
include_directories(${ATLAS_ACL_LIB_DIR}/include/)
|
||||
include_directories(${PROJECT_SRC_ROOT}/../inc)
|
||||
|
||||
# add host lib path
|
||||
link_directories(${ACL_LIB_DIR} ${FWKACL_LIB_DIR})
|
||||
find_library(acl libascendcl.so ${ACL_LIB_DIR}/lib64 ${FWKACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
|
||||
|
||||
add_executable(main utils.cpp
|
||||
sample_process.cpp
|
||||
model_process.cpp
|
||||
main.cpp)
|
||||
|
||||
target_link_libraries(main ${acl} gflags pthread)
|
|
@ -0,0 +1 @@
|
|||
{}
|
|
@ -0,0 +1,55 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
path_cur=$(cd "`dirname $0`" || exit; pwd)
|
||||
|
||||
function preparePath() {
|
||||
rm -rf $1
|
||||
mkdir -p $1
|
||||
cd $1 || exit
|
||||
}
|
||||
|
||||
function buildA300() {
|
||||
if [ ! "${ARCH_PATTERN}" ]; then
|
||||
# set ARCH_PATTERN to acllib when it was not specified by user
|
||||
export ARCH_PATTERN=acllib
|
||||
echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}"
|
||||
else
|
||||
echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user, reset it to ${ARCH_PATTERN}/acllib"
|
||||
export ARCH_PATTERN=${ARCH_PATTERN}/acllib
|
||||
fi
|
||||
|
||||
path_build=$path_cur/build
|
||||
preparePath $path_build
|
||||
cmake ..
|
||||
make -j
|
||||
ret=$?
|
||||
cd ..
|
||||
return ${ret}
|
||||
}
|
||||
|
||||
# set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user
|
||||
if [ ! "${ASCEND_VERSION}" ]; then
|
||||
export ASCEND_VERSION=ascend-toolkit/latest
|
||||
echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}"
|
||||
else
|
||||
echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user"
|
||||
fi
|
||||
|
||||
buildA300
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include "../inc/sample_process.h"
|
||||
#include "../inc/utils.h"
|
||||
bool g_is_device = false;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 3) {
|
||||
ERROR_LOG("usage:./main path_of_om path_of_inputFolder");
|
||||
return FAILED;
|
||||
}
|
||||
SampleProcess processSample;
|
||||
Result ret = processSample.InitResource();
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("sample init resource failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = processSample.Process(argv[1], argv[2]);
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("sample process failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
INFO_LOG("execute sample success");
|
||||
return SUCCESS;
|
||||
}
|
|
@ -0,0 +1,339 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "../inc/model_process.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "../inc/utils.h"
|
||||
extern bool g_is_device;
|
||||
|
||||
ModelProcess::ModelProcess() :modelId_(0), modelMemSize_(0), modelWeightSize_(0), modelMemPtr_(nullptr),
|
||||
modelWeightPtr_(nullptr), loadFlag_(false), modelDesc_(nullptr), input_(nullptr), output_(nullptr) {
|
||||
}
|
||||
|
||||
ModelProcess::~ModelProcess() {
|
||||
Unload();
|
||||
DestroyDesc();
|
||||
DestroyInput();
|
||||
DestroyOutput();
|
||||
}
|
||||
|
||||
Result ModelProcess::LoadModelFromFileWithMem(const char *modelPath) {
|
||||
if (loadFlag_) {
|
||||
ERROR_LOG("has already loaded a model");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
aclError ret = aclmdlQuerySize(modelPath, &modelMemSize_, &modelWeightSize_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("query model failed, model file is %s", modelPath);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = aclrtMalloc(&modelMemPtr_, modelMemSize_, ACL_MEM_MALLOC_HUGE_FIRST);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("malloc buffer for mem failed, require size is %zu", modelMemSize_);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_, ACL_MEM_MALLOC_HUGE_FIRST);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("malloc buffer for weight failed, require size is %zu", modelWeightSize_);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelMemPtr_,
|
||||
modelMemSize_, modelWeightPtr_, modelWeightSize_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("load model from file failed, model file is %s", modelPath);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
loadFlag_ = true;
|
||||
INFO_LOG("load model %s success", modelPath);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Result ModelProcess::CreateDesc() {
|
||||
modelDesc_ = aclmdlCreateDesc();
|
||||
if (modelDesc_ == nullptr) {
|
||||
ERROR_LOG("create model description failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
aclError ret = aclmdlGetDesc(modelDesc_, modelId_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("get model description failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
INFO_LOG("create model description success");
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyDesc() {
|
||||
if (modelDesc_ != nullptr) {
|
||||
(void)aclmdlDestroyDesc(modelDesc_);
|
||||
modelDesc_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
Result ModelProcess::CreateInput(void *inputDataBuffer, size_t bufferSize) {
|
||||
input_ = aclmdlCreateDataset();
|
||||
if (input_ == nullptr) {
|
||||
ERROR_LOG("can't create dataset, create input failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
aclDataBuffer* inputData = aclCreateDataBuffer(inputDataBuffer, bufferSize);
|
||||
if (inputData == nullptr) {
|
||||
ERROR_LOG("can't create data buffer, create input failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
aclError ret = aclmdlAddDatasetBuffer(input_, inputData);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("add input dataset buffer failed");
|
||||
aclDestroyDataBuffer(inputData);
|
||||
inputData = nullptr;
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyInput() {
|
||||
if (input_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) {
|
||||
aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(input_, i);
|
||||
aclDestroyDataBuffer(dataBuffer);
|
||||
}
|
||||
aclmdlDestroyDataset(input_);
|
||||
input_ = nullptr;
|
||||
}
|
||||
|
||||
Result ModelProcess::CreateOutput() {
|
||||
if (modelDesc_ == nullptr) {
|
||||
ERROR_LOG("no model description, create output failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
output_ = aclmdlCreateDataset();
|
||||
if (output_ == nullptr) {
|
||||
ERROR_LOG("can't create dataset, create output failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
size_t outputSize = aclmdlGetNumOutputs(modelDesc_);
|
||||
for (size_t i = 0; i < outputSize; ++i) {
|
||||
size_t buffer_size = aclmdlGetOutputSizeByIndex(modelDesc_, i);
|
||||
|
||||
void *outputBuffer = nullptr;
|
||||
aclError ret = aclrtMalloc(&outputBuffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("can't malloc buffer, size is %zu, create output failed", buffer_size);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
aclDataBuffer* outputData = aclCreateDataBuffer(outputBuffer, buffer_size);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("can't create data buffer, create output failed");
|
||||
aclrtFree(outputBuffer);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = aclmdlAddDatasetBuffer(output_, outputData);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("can't add data buffer, create output failed");
|
||||
aclrtFree(outputBuffer);
|
||||
aclDestroyDataBuffer(outputData);
|
||||
return FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
INFO_LOG("create model output success");
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void ModelProcess::DumpModelOutputResult(char *output_name) {
|
||||
size_t outputNum = aclmdlGetDatasetNumBuffers(output_);
|
||||
|
||||
for (size_t i = 0; i < outputNum; ++i) {
|
||||
std::stringstream ss;
|
||||
ss << "result/" << output_name << "_output_" << i << ".bin";
|
||||
std::string outputFileName = ss.str();
|
||||
FILE *outputFile = fopen(outputFileName.c_str(), "wb");
|
||||
if (outputFile) {
|
||||
aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i);
|
||||
void* data = aclGetDataBufferAddr(dataBuffer);
|
||||
uint32_t len = aclGetDataBufferSizeV2(dataBuffer);
|
||||
|
||||
void* outHostData = NULL;
|
||||
aclError ret = ACL_ERROR_NONE;
|
||||
if (!g_is_device) {
|
||||
ret = aclrtMallocHost(&outHostData, len);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtMallocHost failed, ret[%d]", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = aclrtMemcpy(outHostData, len, data, len, ACL_MEMCPY_DEVICE_TO_HOST);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtMemcpy failed, ret[%d]", ret);
|
||||
(void)aclrtFreeHost(outHostData);
|
||||
return;
|
||||
}
|
||||
|
||||
fwrite(outHostData, len, sizeof(char), outputFile);
|
||||
|
||||
ret = aclrtFreeHost(outHostData);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtFreeHost failed, ret[%d]", ret);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
fwrite(data, len, sizeof(char), outputFile);
|
||||
}
|
||||
fclose(outputFile);
|
||||
outputFile = nullptr;
|
||||
} else {
|
||||
ERROR_LOG("create output file [%s] failed", outputFileName.c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
INFO_LOG("dump data success");
|
||||
return;
|
||||
}
|
||||
|
||||
void ModelProcess::OutputModelResult() {
|
||||
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
|
||||
aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i);
|
||||
void* data = aclGetDataBufferAddr(dataBuffer);
|
||||
uint32_t len = aclGetDataBufferSizeV2(dataBuffer);
|
||||
|
||||
void *outHostData = NULL;
|
||||
aclError ret = ACL_ERROR_NONE;
|
||||
float *outData = NULL;
|
||||
if (!g_is_device) {
|
||||
ret = aclrtMallocHost(&outHostData, len);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtMallocHost failed, ret[%d]", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = aclrtMemcpy(outHostData, len, data, len, ACL_MEMCPY_DEVICE_TO_HOST);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtMemcpy failed, ret[%d]", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
outData = reinterpret_cast<float*>(outHostData);
|
||||
} else {
|
||||
outData = reinterpret_cast<float*>(data);
|
||||
}
|
||||
std::map<float, unsigned int, std::greater<float> > resultMap;
|
||||
for (unsigned int j = 0; j < len / sizeof(float); ++j) {
|
||||
resultMap[*outData] = j;
|
||||
outData++;
|
||||
}
|
||||
|
||||
int cnt = 0;
|
||||
for (auto it = resultMap.begin(); it != resultMap.end(); ++it) {
|
||||
// print top 5
|
||||
if (++cnt > 5) {
|
||||
break;
|
||||
}
|
||||
|
||||
INFO_LOG("top %d: index[%d] value[%lf]", cnt, it->second, it->first);
|
||||
}
|
||||
if (!g_is_device) {
|
||||
ret = aclrtFreeHost(outHostData);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("aclrtFreeHost failed, ret[%d]", ret);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INFO_LOG("output data success");
|
||||
return;
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyOutput() {
|
||||
if (output_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) {
|
||||
aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i);
|
||||
void* data = aclGetDataBufferAddr(dataBuffer);
|
||||
(void)aclrtFree(data);
|
||||
(void)aclDestroyDataBuffer(dataBuffer);
|
||||
}
|
||||
|
||||
(void)aclmdlDestroyDataset(output_);
|
||||
output_ = nullptr;
|
||||
}
|
||||
|
||||
Result ModelProcess::Execute() {
|
||||
aclError ret = aclmdlExecute(modelId_, input_, output_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("execute model failed, modelId is %u", modelId_);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
INFO_LOG("model execute success");
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void ModelProcess::Unload() {
|
||||
if (!loadFlag_) {
|
||||
WARN_LOG("no model had been loaded, unload failed");
|
||||
return;
|
||||
}
|
||||
|
||||
aclError ret = aclmdlUnload(modelId_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("unload model failed, modelId is %u", modelId_);
|
||||
}
|
||||
|
||||
if (modelDesc_ != nullptr) {
|
||||
(void)aclmdlDestroyDesc(modelDesc_);
|
||||
modelDesc_ = nullptr;
|
||||
}
|
||||
|
||||
if (modelMemPtr_ != nullptr) {
|
||||
aclrtFree(modelMemPtr_);
|
||||
modelMemPtr_ = nullptr;
|
||||
modelMemSize_ = 0;
|
||||
}
|
||||
|
||||
if (modelWeightPtr_ != nullptr) {
|
||||
aclrtFree(modelWeightPtr_);
|
||||
modelWeightPtr_ = nullptr;
|
||||
modelWeightSize_ = 0;
|
||||
}
|
||||
|
||||
loadFlag_ = false;
|
||||
INFO_LOG("unload model success, modelId is %u", modelId_);
|
||||
}
|
|
@ -0,0 +1,256 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "../inc/sample_process.h"
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "../inc/model_process.h"
|
||||
#include "acl/acl.h"
|
||||
#include "../inc/utils.h"
|
||||
extern bool g_is_device;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
SampleProcess::SampleProcess() :deviceId_(0), context_(nullptr), stream_(nullptr) {
|
||||
}
|
||||
|
||||
SampleProcess::~SampleProcess() {
|
||||
DestroyResource();
|
||||
}
|
||||
|
||||
Result SampleProcess::InitResource() {
|
||||
// ACL init
|
||||
|
||||
const char *aclConfigPath = "./src/acl.json";
|
||||
aclError ret = aclInit(aclConfigPath);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("acl init failed");
|
||||
return FAILED;
|
||||
}
|
||||
INFO_LOG("acl init success");
|
||||
|
||||
// open device
|
||||
ret = aclrtSetDevice(deviceId_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("acl open device %d failed", deviceId_);
|
||||
return FAILED;
|
||||
}
|
||||
INFO_LOG("open device %d success", deviceId_);
|
||||
|
||||
// create context (set current)
|
||||
ret = aclrtCreateContext(&context_, deviceId_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("acl create context failed");
|
||||
return FAILED;
|
||||
}
|
||||
INFO_LOG("create context success");
|
||||
|
||||
// create stream
|
||||
ret = aclrtCreateStream(&stream_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("acl create stream failed");
|
||||
return FAILED;
|
||||
}
|
||||
INFO_LOG("create stream success");
|
||||
|
||||
// get run mode
|
||||
aclrtRunMode runMode;
|
||||
ret = aclrtGetRunMode(&runMode);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("acl get run mode failed");
|
||||
return FAILED;
|
||||
}
|
||||
g_is_device = (runMode == ACL_DEVICE);
|
||||
INFO_LOG("get run mode success");
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void SampleProcess::GetAllFiles(std::string path, std::vector<string> *files) {
|
||||
DIR *pDir = NULL;
|
||||
struct dirent* ptr;
|
||||
if (!(pDir = opendir(path.c_str()))) {
|
||||
return;
|
||||
}
|
||||
while ((ptr = readdir(pDir)) != 0) {
|
||||
if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
|
||||
files->push_back(path + "/" + ptr->d_name);
|
||||
}
|
||||
}
|
||||
closedir(pDir);
|
||||
}
|
||||
|
||||
Result SampleProcess::Process(char *om_path, char *input_folder) {
|
||||
// model init
|
||||
double second_to_millisecond = 1000;
|
||||
double second_to_microsecond = 1000000;
|
||||
|
||||
double whole_cost_time = 0.0;
|
||||
struct timeval start_global = {0};
|
||||
struct timeval end_global = {0};
|
||||
double startTimeMs_global = 0.0;
|
||||
double endTimeMs_global = 0.0;
|
||||
|
||||
gettimeofday(&start_global, nullptr);
|
||||
|
||||
ModelProcess processModel;
|
||||
const char* omModelPath = om_path;
|
||||
|
||||
Result ret = processModel.LoadModelFromFileWithMem(omModelPath);
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("execute LoadModelFromFileWithMem failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = processModel.CreateDesc();
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("execute CreateDesc failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
ret = processModel.CreateOutput();
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("execute CreateOutput failed");
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
std::vector<string> testFile;
|
||||
GetAllFiles(input_folder, &testFile);
|
||||
|
||||
if (testFile.size() == 0) {
|
||||
WARN_LOG("no input data under folder");
|
||||
}
|
||||
|
||||
// loop begin
|
||||
|
||||
double model_cost_time = 0.0;
|
||||
double edge_to_edge_model_cost_time = 0.0;
|
||||
|
||||
for (size_t index = 0; index < testFile.size(); ++index) {
|
||||
INFO_LOG("start to process file:%s", testFile[index].c_str());
|
||||
// model process
|
||||
|
||||
struct timeval time_init = {0};
|
||||
double timeval_init = 0.0;
|
||||
gettimeofday(&time_init, nullptr);
|
||||
timeval_init = (time_init.tv_sec * second_to_microsecond + time_init.tv_usec) / second_to_millisecond;
|
||||
|
||||
uint32_t devBufferSize;
|
||||
void *picDevBuffer = Utils::GetDeviceBufferOfFile(testFile[index], &devBufferSize);
|
||||
if (picDevBuffer == nullptr) {
|
||||
ERROR_LOG("get pic device buffer failed,index is %zu", index);
|
||||
return FAILED;
|
||||
}
|
||||
ret = processModel.CreateInput(picDevBuffer, devBufferSize);
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("execute CreateInput failed");
|
||||
aclrtFree(picDevBuffer);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
struct timeval start = {0};
|
||||
struct timeval end = {0};
|
||||
double startTimeMs = 0.0;
|
||||
double endTimeMs = 0.0;
|
||||
gettimeofday(&start, nullptr);
|
||||
startTimeMs = (start.tv_sec * second_to_microsecond + start.tv_usec) / second_to_millisecond;
|
||||
|
||||
ret = processModel.Execute();
|
||||
|
||||
gettimeofday(&end, nullptr);
|
||||
endTimeMs = (end.tv_sec * second_to_microsecond + end.tv_usec) / second_to_millisecond;
|
||||
|
||||
double cost_time = endTimeMs - startTimeMs;
|
||||
INFO_LOG("model infer time: %lf ms", cost_time);
|
||||
|
||||
model_cost_time += cost_time;
|
||||
|
||||
double edge_to_edge_cost_time = endTimeMs - timeval_init;
|
||||
edge_to_edge_model_cost_time += edge_to_edge_cost_time;
|
||||
|
||||
if (ret != SUCCESS) {
|
||||
ERROR_LOG("execute inference failed");
|
||||
aclrtFree(picDevBuffer);
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
int pos = testFile[index].find_last_of('/');
|
||||
std::string name = testFile[index].substr(pos+1);
|
||||
std::string outputname = name.substr(0, name.rfind("."));
|
||||
|
||||
// dump output result to file in the current directory
|
||||
processModel.DumpModelOutputResult(const_cast<char *>(outputname.c_str()));
|
||||
|
||||
// release model input buffer
|
||||
aclrtFree(picDevBuffer);
|
||||
processModel.DestroyInput();
|
||||
}
|
||||
double test_file_size = 0.0;
|
||||
test_file_size = testFile.size();
|
||||
INFO_LOG("infer dataset size:%lf", test_file_size);
|
||||
|
||||
gettimeofday(&end_global, nullptr);
|
||||
startTimeMs_global = (start_global.tv_sec * second_to_microsecond + start_global.tv_usec) / second_to_millisecond;
|
||||
endTimeMs_global = (end_global.tv_sec * second_to_microsecond + end_global.tv_usec) / second_to_millisecond;
|
||||
whole_cost_time = (endTimeMs_global - startTimeMs_global) / test_file_size;
|
||||
|
||||
model_cost_time /= test_file_size;
|
||||
INFO_LOG("model cost time per sample: %lf ms", model_cost_time);
|
||||
edge_to_edge_model_cost_time /= test_file_size;
|
||||
INFO_LOG("edge-to-edge model cost time per sample:%lf ms", edge_to_edge_model_cost_time);
|
||||
INFO_LOG("whole cost time per sample: %lf ms", whole_cost_time);
|
||||
|
||||
// loop end
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void SampleProcess::DestroyResource() {
|
||||
aclError ret;
|
||||
if (stream_ != nullptr) {
|
||||
ret = aclrtDestroyStream(stream_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("destroy stream failed");
|
||||
}
|
||||
stream_ = nullptr;
|
||||
}
|
||||
INFO_LOG("end to destroy stream");
|
||||
|
||||
if (context_ != nullptr) {
|
||||
ret = aclrtDestroyContext(context_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("destroy context failed");
|
||||
}
|
||||
context_ = nullptr;
|
||||
}
|
||||
INFO_LOG("end to destroy context");
|
||||
|
||||
ret = aclrtResetDevice(deviceId_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("reset device failed");
|
||||
}
|
||||
INFO_LOG("end to reset device is %d", deviceId_);
|
||||
|
||||
ret = aclFinalize();
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("finalize acl failed");
|
||||
}
|
||||
INFO_LOG("end to finalize acl");
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "../inc/utils.h"
|
||||
#include <sys/stat.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include "acl/acl.h"
|
||||
|
||||
extern bool g_is_device;
|
||||
|
||||
void* Utils::ReadBinFile(std::string fileName, uint32_t *fileSize) {
|
||||
struct stat sBuf;
|
||||
int fileStatus = stat(fileName.data(), &sBuf);
|
||||
if (fileStatus == -1) {
|
||||
ERROR_LOG("failed to get file");
|
||||
return nullptr;
|
||||
}
|
||||
if (S_ISREG(sBuf.st_mode) == 0) {
|
||||
ERROR_LOG("%s is not a file, please enter a file", fileName.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::ifstream binFile(fileName, std::ifstream::binary);
|
||||
if (binFile.is_open() == false) {
|
||||
ERROR_LOG("open file %s failed", fileName.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
binFile.seekg(0, binFile.end);
|
||||
uint32_t binFileBufferLen = binFile.tellg();
|
||||
if (binFileBufferLen == 0) {
|
||||
ERROR_LOG("binfile is empty, filename is %s", fileName.c_str());
|
||||
binFile.close();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
binFile.seekg(0, binFile.beg);
|
||||
|
||||
void* binFileBufferData = nullptr;
|
||||
aclError ret = ACL_ERROR_NONE;
|
||||
if (!g_is_device) {
|
||||
ret = aclrtMallocHost(&binFileBufferData, binFileBufferLen);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("malloc for binFileBufferData failed");
|
||||
binFile.close();
|
||||
return nullptr;
|
||||
}
|
||||
if (binFileBufferData == nullptr) {
|
||||
ERROR_LOG("malloc binFileBufferData failed");
|
||||
binFile.close();
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
ret = aclrtMalloc(&binFileBufferData, binFileBufferLen, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("malloc device buffer failed. size is %u", binFileBufferLen);
|
||||
binFile.close();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
binFile.read(static_cast<char *>(binFileBufferData), binFileBufferLen);
|
||||
binFile.close();
|
||||
*fileSize = binFileBufferLen;
|
||||
return binFileBufferData;
|
||||
}
|
||||
|
||||
void* Utils::GetDeviceBufferOfFile(std::string fileName, uint32_t *fileSize) {
|
||||
uint32_t inputHostBuffSize = 0;
|
||||
void* inputHostBuff = Utils::ReadBinFile(fileName, &inputHostBuffSize);
|
||||
if (inputHostBuff == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!g_is_device) {
|
||||
void *inBufferDev = nullptr;
|
||||
uint32_t inBufferSize = inputHostBuffSize;
|
||||
aclError ret = aclrtMalloc(&inBufferDev, inBufferSize, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("malloc device buffer failed. size is %u", inBufferSize);
|
||||
aclrtFreeHost(inputHostBuff);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ret = aclrtMemcpy(inBufferDev, inBufferSize, inputHostBuff, inputHostBuffSize, ACL_MEMCPY_HOST_TO_DEVICE);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
ERROR_LOG("memcpy failed. device buffer size is %u, input host buffer size is %u",
|
||||
inBufferSize, inputHostBuffSize);
|
||||
aclrtFree(inBufferDev);
|
||||
aclrtFreeHost(inputHostBuff);
|
||||
return nullptr;
|
||||
}
|
||||
aclrtFreeHost(inputHostBuff);
|
||||
*fileSize = inBufferSize;
|
||||
return inBufferDev;
|
||||
} else {
|
||||
*fileSize = inputHostBuffSize;
|
||||
return inputHostBuff;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue