!34113 [MS][LITE]Add Hi35xx keep original output

Merge pull request !34113 from gongdaguo1/add_hi3516_origin_output
This commit is contained in:
i-robot 2022-05-10 06:13:47 +00:00 committed by Gitee
commit 16f9b62536
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
17 changed files with 781 additions and 196 deletions

View File

@ -161,6 +161,14 @@ class MS_API Model {
Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
/// \brief Inference model.
///
/// \param[in] before CallBack before predict.
/// \param[in] after CallBack after predict.
///
/// \return Status.
Status Predict(const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
/// \brief Train model by step.
///
/// \param[in] before CallBack before predict.

View File

@ -0,0 +1,47 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NNIE_SRC_CUSTOM_ALLOCATOR_H_
#define NNIE_SRC_CUSTOM_ALLOCATOR_H_
#include <memory>
#include <string>
#include <vector>
#include <mutex>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <atomic>
#include "include/api/allocator.h"
#include "include/hi_type.h"
namespace mindspore {
namespace nnie {
class CustomAllocator : public Allocator {
public:
CustomAllocator() {}
~CustomAllocator() override{};
void *Malloc(size_t size) override { return nullptr; }
void Free(void *ptr) override {}
int RefCount(void *ptr) override { return 1; }
int SetRefCount(void *ptr, int ref_count) override { return ref_count; }
int DecRefCount(void *ptr, int ref_count) override { return 1; }
int IncRefCount(void *ptr, int ref_count) override { return 1; }
};
} // namespace nnie
} // namespace mindspore
#endif // NNIE_SRC_CUSTOM_ALLOCATOR_H_

View File

@ -20,9 +20,6 @@
#include "schema/model_generated.h"
#include "include/registry/register_kernel.h"
#include "include/errorcode.h"
#include "src/nnie_manager.h"
#include "src/nnie_print.h"
#include "src/nnie_cfg_parser.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
@ -30,29 +27,29 @@ using mindspore::schema::PrimitiveType_Custom;
namespace mindspore {
namespace nnie {
bool CustomCPUKernel::load_model_ = false;
static std::shared_ptr<Allocator> kCustomAllocator = std::make_shared<nnie::CustomAllocator>();
int CustomCPUKernel::run_seg_ = 0;
bool CustomCPUKernel::roi_used_ = false;
int CustomCPUKernel::Prepare() {
if (!load_model_) {
Flags flags;
if (flags.Init(*this) != RET_OK) {
if ((manager_) == nullptr) {
LOGE("manager_ is nullptr.");
return RET_ERROR;
}
if (!manager_->GetLoadModel()) {
if (manager_->GetFlags()->Init(*this) != RET_OK) {
LOGE("Nnie config init fail.");
return RET_ERROR;
}
if (nnie::NNIEManager::GetInstance()->CfgInit(flags.max_roi_num_, flags.time_step_, flags.core_ids_) != RET_OK) {
if (manager_->CfgInit(*manager_->GetFlags(), manager_->GetMaxSegId()) != RET_OK) {
LOGE("Nnie init cfg fail.");
return RET_ERROR;
}
if (nnie::NNIEManager::GetInstance()->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
if (manager_->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
LOGI("Load WK Model Fail.");
return RET_OK;
}
load_model_ = true;
manager_->SetLoadModel(true);
}
outputs_shapes_.resize(outputs_.size());
for (size_t i = 0; i < outputs_.size(); i++) {
@ -62,38 +59,51 @@ int CustomCPUKernel::Prepare() {
}
int CustomCPUKernel::ReSize() {
if (load_model_) {
nnie::NNIEManager::GetInstance()->Release();
load_model_ = false;
if (manager_->GetLoadModel() && seg_id() == 0) {
manager_->Release(true);
manager_->SetLoadModel(false);
}
return Prepare();
}
int CustomCPUKernel::Execute() {
if (!load_model_) {
if (!manager_->GetLoadModel()) {
LOGE("WK Model is not load.");
return RET_ERROR;
}
run_seg_ = seg_id_;
Flags *flags = manager_->GetFlags();
if (flags->keep_origin_output_) {
if (seg_id_ == 0) {
if (manager_->LoadInputs(&inputs_, kCustomAllocator) != RET_OK) {
LOGE("Unable to find the physical address corresponding to the input tensor.");
return RET_ERROR;
}
}
if (seg_id_ == manager_->GetMaxSegId()) {
if (manager_->LoadOutputs(&outputs_, kCustomAllocator) != RET_OK) {
LOGE("Unable to find the physical address corresponding to the output tensor.");
return RET_ERROR;
}
}
}
if (nnie::NNIEManager::GetInstance()->FillData(&inputs_, run_seg_) != RET_OK) {
if (manager_->FillData(&inputs_, seg_id_) != RET_OK) {
LOGE("Fail Fill Data.");
return RET_ERROR;
}
if (nnie::NNIEManager::GetInstance()->Run(&outputs_, run_seg_, outputs_shapes_) != RET_OK) {
if (manager_->Run(&outputs_, seg_id_, outputs_shapes_) != RET_OK) {
LOGE("Fail WK Run.");
return RET_ERROR;
}
run_seg_++;
return RET_OK;
}
CustomCPUKernel::~CustomCPUKernel() {
if (load_model_) {
nnie::NNIEManager::GetInstance()->Release();
load_model_ = false;
if (manager_->GetLoadModel()) {
manager_->Release(false);
manager_->SetLoadModel(false);
}
}
@ -159,7 +169,13 @@ std::shared_ptr<mindspore::kernel::Kernel> CustomCreateKernel(const std::vector<
forward_bbox = true;
}
}
auto kernel = std::make_shared<CustomCPUKernel>(ndims, forward_bbox, inputs, outputs, primitive, ctx);
auto model_buf = static_cast<const void *>(inputs[inputs.size() - 1].Data().get());
auto manager = nnie::NNIEManager::GetInstance(model_buf);
if ((manager) == nullptr) {
LOGE("malloc NNIEManager failed.");
return nullptr;
}
auto kernel = std::make_shared<CustomCPUKernel>(manager, ndims, forward_bbox, inputs, outputs, primitive, ctx);
if (kernel == nullptr) {
LOGE("new custom kernel is nullptr");
return nullptr;

View File

@ -19,10 +19,16 @@
#include <vector>
#include <string>
#include <memory>
#include "include/schema/model_generated.h"
#include "include/context.h"
#include "include/api/kernel.h"
#include "src/custom_infer.h"
#include "include/hi_type.h"
#include "src/nnie_cfg_parser.h"
#include "src/nnie_manager.h"
#include "src/nnie_print.h"
#include "src/custom_allocator.h"
using mindspore::kernel::Kernel;
using mindspore::tensor::MSTensor;
@ -30,12 +36,14 @@ namespace mindspore {
namespace nnie {
class CustomCPUKernel : public Kernel {
public:
CustomCPUKernel(int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
CustomCPUKernel(nnie::NNIEManager *manager, int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
const std::vector<MSTensor> &outputs, const mindspore::schema::Primitive *primitive,
const mindspore::Context *ctx)
: Kernel(inputs, outputs, primitive, ctx), seg_id_(seg_id), forward_bbox_(forward_bbox) {
if (forward_bbox) {
roi_used_ = true;
: Kernel(inputs, outputs, primitive, ctx), manager_(manager), seg_id_(seg_id), forward_bbox_(forward_bbox) {
if ((manager_) == nullptr) {
LOGE("manager_ is nullptr.");
} else {
manager_->SetMaxSegId(seg_id);
}
}
@ -54,9 +62,7 @@ class CustomCPUKernel : public Kernel {
void set_forward_bbox(bool flag) { forward_bbox_ = flag; }
private:
static bool load_model_;
static int run_seg_;
static bool roi_used_;
nnie::NNIEManager *manager_ = nullptr;
int seg_id_ = 0;
bool forward_bbox_ = false;
std::vector<std::vector<int64_t>> outputs_shapes_;

View File

@ -34,6 +34,7 @@ namespace {
constexpr auto kTimeStep = "TimeStep";
constexpr auto kMazRoiNum = "MaxROINum";
constexpr auto kCoreIds = "CoreIds";
constexpr auto kKeepOrigin = "KeepOriginalOutput";
constexpr auto DELIM = ",";
constexpr int MAX_CORE_ID = 7;
} // namespace
@ -46,25 +47,49 @@ void PrintInvalidChar(const std::string &key, const std::string &dat) {
LOGE(message.c_str());
}
int Flags::Init(const kernel::Kernel &kernel) {
auto nnie_arg = kernel.GetConfig("nnie");
if (nnie_arg.find(kTimeStep) != nnie_arg.end()) {
if (IsValidUnsignedNum(nnie_arg.at(kTimeStep)) == true) {
this->time_step_ = stoi(nnie_arg.at(kTimeStep));
int Flags::ParserInt(const std::map<std::string, std::string> &nnie_arg, const std::string key, int *val) {
auto iter = nnie_arg.find(key);
if (iter != nnie_arg.end()) {
auto str = iter->second;
if (IsValidUnsignedNum(str) == true) {
*val = stoi(str);
} else {
PrintInvalidChar(kTimeStep, nnie_arg.at(kTimeStep));
PrintInvalidChar(key, str);
return RET_ERROR;
}
}
return RET_OK;
}
if (nnie_arg.find(kMazRoiNum) != nnie_arg.end()) {
if (IsValidUnsignedNum(nnie_arg.at(kMazRoiNum)) == true) {
this->max_roi_num_ = stoi(nnie_arg.at(kMazRoiNum));
int Flags::ParserBool(const std::map<std::string, std::string> &nnie_arg, const std::string key, bool *val) {
auto iter = nnie_arg.find(key);
if (iter != nnie_arg.end()) {
auto str = iter->second;
if (str.find("on") != std::string::npos) {
*val = true;
} else if (str.find("off") != std::string::npos) {
*val = false;
} else {
PrintInvalidChar(kMazRoiNum, nnie_arg.at(kMazRoiNum));
PrintInvalidChar(key, str);
return RET_ERROR;
}
}
return RET_OK;
}
int Flags::Init(const kernel::Kernel &kernel) {
auto nnie_arg = kernel.GetConfig("nnie");
if (ParserInt(nnie_arg, kTimeStep, &this->time_step_) != RET_OK) {
return RET_ERROR;
}
if (ParserInt(nnie_arg, kMazRoiNum, &this->max_roi_num_) != RET_OK) {
return RET_ERROR;
}
if (ParserBool(nnie_arg, kKeepOrigin, &this->keep_origin_output_) != RET_OK) {
return RET_ERROR;
}
if (nnie_arg.find(kCoreIds) != nnie_arg.end()) {
auto ids = nnie_arg.at(kCoreIds);
@ -85,6 +110,7 @@ int Flags::Init(const kernel::Kernel &kernel) {
return RET_ERROR;
}
}
return RET_OK;
}
} // namespace nnie

View File

@ -16,10 +16,17 @@
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
#include <vector>
#include <map>
#include <string>
#include "include/api/kernel.h"
#include "include/hi_type.h"
namespace mindspore {
namespace nnie {
typedef struct {
HI_U64 phy_;
HI_U32 size_;
} MEM_ITEM;
/**
* Flags is a config container.
* Member objects:
@ -39,6 +46,11 @@ class Flags {
int time_step_{1};
int max_roi_num_{300};
std::vector<int> core_ids_{0};
bool keep_origin_output_{false};
private:
int ParserInt(const std::map<std::string, std::string> &nnie_arg, const std::string key, int *val);
int ParserBool(const std::map<std::string, std::string> &nnie_arg, const std::string key, bool *val);
};
} // namespace nnie
} // namespace mindspore

View File

@ -26,6 +26,7 @@ using mindspore::lite::RET_OK;
namespace mindspore {
namespace nnie {
constexpr int kSleepUs = 100;
constexpr int kCompressionWidth = 2;
static void NnieParamRelease(NnieParam *nnie_param) {
if (nnie_param == nullptr) {
return;
@ -141,7 +142,8 @@ static void FillForwardInfo(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
}
static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32 total_step, SVP_BLOB_S blob[],
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool *mem_alloc = nullptr) {
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool malloc_allow,
bool *mem_alloc = nullptr) {
HI_U32 i = 0;
HI_U32 size;
HI_U32 stride;
@ -173,7 +175,9 @@ static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32
blob_size[i] = 0;
}
}
*total_size += blob_size[i];
if (malloc_allow) {
*total_size += blob_size[i];
}
blob[i].u32Stride = stride;
}
}
@ -208,18 +212,71 @@ static int GetTaskAndBlobBufSize(NnieCfg *nnie_cfg, NnieParam *nnie_param, HI_U3
j);
}
}
bool malloc_allow = (!nnie_cfg->pass_align16_io_) || i != 0;
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astSrcNode[0]), nnie_param->model_->astSeg[i].u16SrcNum, total_step,
&(nnie_param->seg_data_[i].src_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].src_size_[0]),
&(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
malloc_allow, &(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
malloc_allow = (!nnie_cfg->pass_align16_io_) || (i + 1) != nnie_param->model_->u32NetSegNum;
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astDstNode[0]), nnie_param->model_->astSeg[i].u16DstNum, total_step,
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]));
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]),
malloc_allow);
}
return RET_OK;
}
static int NnieSetBlobAddr(HI_U64 *phy_addr, HI_U8 **vir_addr, NnieParam *nnie_param, NnieBlobSize *blob_size,
bool pass_align16_io) {
HI_U32 i, j;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
if ((!pass_align16_io) || i != 0) {
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
if (j != 0) {
*phy_addr += blob_size[i].src_size_[j - 1];
*vir_addr += blob_size[i].src_size_[j - 1];
}
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
&(nnie_param->seg_data_[i].src_[j]))) {
LOGE("ConnectNnieInnerNode failed! ");
return RET_ERROR;
}
} else {
nnie_param->seg_data_[i].src_[j].u64PhyAddr = *phy_addr;
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)*vir_addr;
}
}
*phy_addr += blob_size[i].src_size_[j - 1];
*vir_addr += blob_size[i].src_size_[j - 1];
} else {
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
nnie_param->seg_data_[i].src_[j].u64PhyAddr = 0;
nnie_param->seg_data_[i].src_[j].u64VirAddr = 0;
}
}
if ((!pass_align16_io) || (i + 1) != nnie_param->model_->u32NetSegNum) {
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
if (j != 0) {
*phy_addr += blob_size[i].dst_size_[j - 1];
*vir_addr += blob_size[i].dst_size_[j - 1];
}
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = *phy_addr;
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)*vir_addr;
}
*phy_addr += blob_size[i].dst_size_[j - 1];
*vir_addr += blob_size[i].dst_size_[j - 1];
} else {
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = 0;
nnie_param->seg_data_[i].dst_[j].u64VirAddr = 0;
}
}
}
return RET_OK;
}
static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
HI_U32 i, j;
HI_U32 i;
HI_U32 total_size = 0, total_task_buf_size = 0, tmp_buf_size_ = 0;
HI_S32 ret = HI_SUCCESS;
HI_U32 off_set = 0;
@ -288,36 +345,9 @@ static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
phy_addr = phy_addr + total_task_buf_size + tmp_buf_size_;
vir_addr = vir_addr + total_task_buf_size + tmp_buf_size_;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
if (j != 0) {
phy_addr += blob_size[i].src_size_[j - 1];
vir_addr += blob_size[i].src_size_[j - 1];
}
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
&(nnie_param->seg_data_[i].src_[j]))) {
LOGE("ConnectNnieInnerNode failed! ");
return RET_ERROR;
}
} else {
nnie_param->seg_data_[i].src_[j].u64PhyAddr = phy_addr;
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
}
}
phy_addr += blob_size[i].src_size_[j - 1];
vir_addr += blob_size[i].src_size_[j - 1];
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
if (j != 0) {
phy_addr += blob_size[i].dst_size_[j - 1];
vir_addr += blob_size[i].dst_size_[j - 1];
}
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = phy_addr;
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
}
phy_addr += blob_size[i].dst_size_[j - 1];
vir_addr += blob_size[i].dst_size_[j - 1];
if (NnieSetBlobAddr(&phy_addr, &vir_addr, nnie_param, blob_size, nnie_cfg->pass_align16_io_) != RET_OK) {
LOGE("SetBlobAddr failed!");
return RET_ERROR;
}
if (has_roi) {
nnie_param->rpn_bbox_.u64PhyAddr = phy_addr;
@ -536,70 +566,108 @@ int FillByFloat(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_F
return RET_OK;
}
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
HI_U32 i, j, n, ret;
HI_U32 height, width, channel, stride, dim;
HI_U8 *input_addr_u8 = nullptr;
HI_S32 *input_addr_s32 = nullptr;
HI_U32 *step_addr_u32 = nullptr;
HI_FLOAT *float_src_data = nullptr;
HI_U8 *u8_src_data = nullptr;
static int NnieFillSrcDataSeq(NnieCfg *nnie_cfg, SVP_SRC_BLOB_S *blob, HI_U32 input_size) {
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
HI_U32 dim = blob->unShape.stSeq.u32Dim;
HI_U32 stride = blob->u32Stride;
HI_U32 i, j, n;
HI_U32 total_step_num = 0;
HI_U32 input_size = 1;
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
for (n = 0; n < (HI_U32)size; n++) {
input_size *= shape[n];
}
input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
dim = blob->unShape.stSeq.u32Dim;
stride = blob->u32Stride;
HI_U8 *input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
HI_S32 *input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
HI_FLOAT *float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
for (n = 0; n < blob->u32Num; n++) {
for (n = 0; n < blob->u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
if (input_size != total_step_num * dim) {
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < *(step_addr_u32 + n); i++) {
for (j = 0; j < dim; j++) {
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
}
input_addr_u8 += stride;
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
float_src_data += dim;
}
}
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
return RET_OK;
}
HI_U32 GetBlobSize(const SVP_SRC_BLOB_S &blob) {
if (SVP_BLOB_TYPE_SEQ_S32 == blob.enType) {
HI_U32 stride = blob.u32Stride;
HI_U32 total_step_num = 0;
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob.unShape.stSeq.u64VirAddrStep);
size_t n;
for (n = 0; n < blob.u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
return total_step_num * stride;
}
if (input_size != total_step_num * dim) {
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < *(step_addr_u32 + n); i++) {
for (j = 0; j < dim; j++) {
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
}
input_addr_u8 += stride;
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
float_src_data += dim;
}
}
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
HI_U32 stride = blob.u32Stride;
HI_U32 height = blob.unShape.stWhc.u32Height;
HI_U32 channel = blob.unShape.stWhc.u32Chn;
if (SVP_BLOB_TYPE_YVU420SP == blob.enType) {
return blob.u32Num * static_cast<HI_U32>(channel * height / kCompressionWidth) * stride;
} else if (SVP_BLOB_TYPE_YVU422SP == blob.enType) {
return blob.u32Num * height * kCompressionWidth * stride;
} else {
height = blob->unShape.stWhc.u32Height;
width = blob->unShape.stWhc.u32Width;
channel = blob->unShape.stWhc.u32Chn;
stride = blob->u32Stride;
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
u8_src_data, input_addr_u8);
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
} else {
if (SVP_BLOB_TYPE_U8 == blob->enType) {
ret =
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
return blob.u32Num * channel * height * stride;
}
}
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
HI_U32 i, ret;
HI_U32 input_size = 1;
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
for (i = 0; i < (HI_U32)size; i++) {
input_size *= shape[i];
}
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
return NnieFillSrcDataSeq(nnie_cfg, blob, input_size);
} else {
HI_U8 *input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
HI_S32 *input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
HI_FLOAT *float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
HI_U8 *u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
HI_U32 height = blob->unShape.stWhc.u32Height;
HI_U32 width = blob->unShape.stWhc.u32Width;
HI_U32 channel = blob->unShape.stWhc.u32Chn;
HI_U32 stride = blob->u32Stride;
if (input_addr_u8 == u8_src_data) {
if (blob->enType == SVP_BLOB_TYPE_S32) {
for (i = 0; i < input_size; i++) {
input_addr_s32[i] = float_src_data[i] * NNIE_QUANT_BASE;
}
} else {
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
input_addr_u8);
LOGI("\ninput no memcpy");
}
} else {
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
u8_src_data, input_addr_u8);
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
} else {
if (SVP_BLOB_TYPE_U8 == blob->enType) {
ret =
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
} else {
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
input_addr_u8);
}
}
if (ret != RET_OK) {
return ret;
}
}
if (ret != RET_OK) {
return ret;
}
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr),
blob->u32Num * channel * height * stride);
@ -608,42 +676,32 @@ static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataInd
return RET_OK;
}
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
static int NnieGetDstDataSEQ(SVP_SRC_BLOB_S *blob, HI_U32 input_num, NnieDataIndex *input_data_idx,
HI_FLOAT *float_dst_data) {
HI_U32 i, j, n;
HI_U32 height, width, channel, stride, dim;
HI_U8 *output_addr_u8 = nullptr;
HI_S32 *output_addr_s32 = nullptr;
HI_U32 *step_addr_u32 = nullptr;
HI_FLOAT *float_dst_data = nullptr;
HI_U32 dim = blob->unShape.stSeq.u32Dim;
HI_U32 stride = blob->u32Stride;
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
HI_U32 total_step_num = 0;
HI_U32 input_num = 1;
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
for (n = 0; n < (HI_U32)size; n++) {
input_num *= shape[n];
}
HI_U8 *output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
HI_S32 *output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
LOGE("Nnie output type error");
for (n = 0; n < blob->u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
if (input_num != total_step_num * dim) {
LOGE("input shape");
return RET_ERROR;
}
output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
dim = blob->unShape.stSeq.u32Dim;
stride = blob->u32Stride;
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
if (input_data_idx->seg_idx_ == input_data_idx->max_seg_id_) {
for (n = 0; n < blob->u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
if (input_num != total_step_num * dim) {
LOGE("input shape");
return RET_ERROR;
for (i = 0; i < *(step_addr_u32 + n); i++) {
memcpy(float_dst_data, output_addr_u8, dim * sizeof(float));
float_dst_data += dim;
output_addr_u8 += stride;
}
}
} else {
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < *(step_addr_u32 + n); i++) {
for (j = 0; j < dim; j++) {
@ -654,23 +712,67 @@ static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataInde
float_dst_data += dim;
}
}
} else {
height = blob->unShape.stWhc.u32Height;
width = blob->unShape.stWhc.u32Width;
channel = blob->unShape.stWhc.u32Chn;
stride = blob->u32Stride;
if (input_num != height * channel * width * blob->u32Num) {
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
}
return RET_OK;
}
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
HI_U32 input_num = 1;
for (HI_U32 i = 0; i < (HI_U32)size; i++) {
input_num *= shape[i];
}
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
LOGE("Nnie output type error");
return RET_ERROR;
}
HI_FLOAT *float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
if (NnieGetDstDataSEQ(blob, input_num, input_data_idx, float_dst_data) != RET_OK) {
LOGE("NnieGetDstDataSEQ error.");
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < channel * height; i++) {
for (j = 0; j < width; j++) {
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
} else {
HI_U8 *output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
HI_S32 *output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
if (float_dst_data == reinterpret_cast<float *>(output_addr_s32)) {
if (input_data_idx->seg_idx_ != input_data_idx->max_seg_id_) {
for (HI_U32 i = 0; i < input_num; i++) {
float_dst_data[i] = (HI_FLOAT)output_addr_s32[i] / NNIE_QUANT_BASE;
}
} else {
LOGI("\noutput no memcpy");
}
} else {
HI_U32 height = blob->unShape.stWhc.u32Height;
HI_U32 width = blob->unShape.stWhc.u32Width;
HI_U32 channel = blob->unShape.stWhc.u32Chn;
HI_U32 stride = blob->u32Stride;
if (input_num != height * channel * width * blob->u32Num) {
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
return RET_ERROR;
}
if (input_data_idx->seg_idx_ == input_data_idx->max_seg_id_) {
if (nnie_cfg->pass_align16_io_) {
memcpy(float_dst_data, output_addr_u8, blob->u32Num * channel * height * stride);
} else {
for (HI_U32 i = 0; i < (blob->u32Num * channel * height); i++) {
memcpy(float_dst_data, output_addr_u8, width * sizeof(float));
float_dst_data += width;
output_addr_u8 += stride;
}
}
} else {
for (HI_U32 n = 0; n < blob->u32Num; n++) {
for (HI_U32 i = 0; i < channel * height; i++) {
for (HI_U32 j = 0; j < width; j++) {
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
}
output_addr_u8 += stride;
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
float_dst_data += width;
}
}
output_addr_u8 += stride;
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
float_dst_data += width;
}
}
}

View File

@ -19,12 +19,14 @@
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include "include/api/types.h"
#include "include/mpi_vb.h"
#include "include/hi_comm_svp.h"
#include "include/hi_nnie.h"
#include "include/mpi_nnie.h"
#include "include/ir/dtype/type_id.h"
#include "src/nnie_cfg_parser.h"
namespace mindspore {
namespace nnie {
@ -70,9 +72,11 @@ typedef struct {
SVP_NNIE_FORWARD_CTRL_S forward_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_NNIE_FORWARD_WITHBBOX_CTRL_S forward_with_bbox_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
NNIEMemCfg mem_cfg_;
bool get_mem_strong;
} NnieParam;
typedef struct {
bool pass_align16_io_;
HI_VOID *data_ptr_;
HI_U32 max_input_num_;
HI_U32 max_roi_num_;
@ -85,6 +89,7 @@ typedef struct {
typedef struct {
HI_U32 seg_idx_;
HI_U32 node_idx_;
HI_U32 max_seg_id_;
} NnieDataIndex;
typedef struct {
@ -110,6 +115,8 @@ int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box);
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, int id);
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index);
HI_U32 GetBlobSize(const SVP_SRC_BLOB_S &blob);
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_

View File

@ -14,6 +14,9 @@
* limitations under the License.
*/
#include <cstring>
#include <string>
#include <map>
#include <memory>
#include "src/nnie_manager.h"
#include "src/nnie_common.h"
#include "src/nnie_print.h"
@ -24,26 +27,29 @@ using mindspore::lite::RET_OK;
namespace mindspore {
namespace nnie {
constexpr int kUINT16_MAX = 65535;
constexpr int kNumInput2 = 2;
int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core_id) {
int NNIEManager::CfgInit(const Flags &flags, int max_seg_id) {
memset(&nnie_cfg_, 0, sizeof(NnieRunCfg));
nnie_cfg_.cfg_.max_roi_num_ = max_roi_num;
nnie_cfg_.cfg_.step_ = step;
if (core_id.size() == 1) {
nnie_cfg_.cfg_.pass_align16_io_ = flags.keep_origin_output_;
nnie_cfg_.param_.get_mem_strong = false;
nnie_cfg_.run_idx_.max_seg_id_ = flags.keep_origin_output_ ? max_seg_id + 1 : kUINT16_MAX;
nnie_cfg_.cfg_.max_roi_num_ = flags.max_roi_num_;
nnie_cfg_.cfg_.step_ = flags.time_step_;
if (flags.core_ids_.size() == 1) {
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM; i++) {
if (core_id[0] < SVP_NNIE_ID_BUTT) {
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[0];
if (flags.core_ids_[0] < SVP_NNIE_ID_BUTT) {
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)flags.core_ids_[0];
} else {
LOGE("nnie core num toobig.\n");
return RET_ERROR;
}
}
}
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < core_id.size(); i++) {
if (core_id[i] < SVP_NNIE_ID_BUTT) {
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[i];
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < flags.core_ids_.size(); i++) {
if (flags.core_ids_[i] < SVP_NNIE_ID_BUTT) {
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)flags.core_ids_[i];
} else {
LOGE("nnie core num toobig.\n");
return RET_ERROR;
@ -51,6 +57,108 @@ int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core
}
return RET_OK;
}
int NNIEManager::MallocBlobData(SVP_SRC_BLOB_S *blob, mindspore::MSTensor *tensor, HI_U32 blob_size) {
auto ret = NnieMemMallocCached(tensor->Name().c_str(), nullptr, reinterpret_cast<HI_U64 *>(&blob->u64PhyAddr),
reinterpret_cast<void **>(&blob->u64VirAddr), blob_size);
if (HI_SUCCESS != ret) {
LOGE("Error,MallocBlobData failed!");
return RET_ERROR;
}
blobs_.push_back(blob);
tensors_.push_back(tensor);
return RET_OK;
}
int NNIEManager::SetBlobAddr(SVP_SRC_BLOB_S *blob, HI_U64 virt, mindspore::MSTensor *tensor,
std::shared_ptr<Allocator> allocator) {
HI_U32 blob_size = GetBlobSize(*blob);
if (virt == 0) {
auto iter = std::find(blobs_.begin(), blobs_.end(), blob);
if (iter == blobs_.end()) {
if (MallocBlobData(blob, tensor, blob_size) != RET_OK) {
LOGE("Failed to malloc.");
return RET_ERROR;
}
}
tensor->SetAllocator(allocator);
tensor->SetData(reinterpret_cast<void *>(blob->u64VirAddr));
LOGI("\nSet %s allocator!", tensor->Name().c_str());
} else {
auto ret = NnieGetVirMemInfo(virt, &blob->u64PhyAddr);
if (ret == HI_SUCCESS) {
blob->u64VirAddr = virt;
LOGI("Get physical address %llu.", blob->u64PhyAddr);
} else {
auto iter = std::find(blobs_.begin(), blobs_.end(), blob);
if (iter == blobs_.end()) {
if (MallocBlobData(blob, tensor, blob_size) != RET_OK) {
LOGE("Error, tensor data pointer is not MMZ memory, failed to malloc.");
return RET_ERROR;
}
}
}
}
return RET_OK;
}
int NNIEManager::LoadInputs(std::vector<mindspore::MSTensor> *inputs, std::shared_ptr<Allocator> allocator) {
size_t input_size = inputs->size();
if ((input_size < kNumInput2) || (input_size - 1) != nnie_cfg_.param_.model_->astSeg[0].u16SrcNum) {
LOGE("Input Size Err!");
return RET_ERROR;
}
for (size_t i = 0; i < nnie_cfg_.param_.model_->astSeg[0].u16SrcNum; i++) {
size_t j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[0].astSrcNode[i].szName);
if (j == (input_size - 1)) {
j = i;
LOGI("input tensor name(%s) can't match wk node name(%s).", (*inputs)[j].Name().c_str(),
nnie_cfg_.param_.model_->astSeg[0].astSrcNode[i].szName);
}
HI_U64 virt = (HI_U64)(HI_UL)((*inputs)[j].Data().get());
auto blob = &nnie_cfg_.param_.seg_data_[0].src_[i];
if (SetBlobAddr(blob, virt, &(*inputs)[j], allocator) != RET_OK) {
return RET_ERROR;
}
}
return RET_OK;
}
int NNIEManager::LoadOutputs(std::vector<mindspore::MSTensor> *outputs, std::shared_ptr<Allocator> allocator) {
int output_size = outputs->size();
HI_U32 seg_id = nnie_cfg_.model_.model_.u32NetSegNum - 1;
if (output_size != nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum) {
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
return RET_ERROR;
}
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
LOGE("Unsupported use PassAlign16InOutput!");
return RET_ERROR;
}
for (int i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum; i++) {
int j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
if (j == output_size) {
j = i;
LOGI("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
}
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].dst_[i];
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
LOGE("Nnie output type error");
return RET_ERROR;
}
HI_U64 virt = (HI_U64)(HI_UL)((*outputs)[j].Data().get());
if (SetBlobAddr(blob, virt, &(*outputs)[j], allocator) != RET_OK) {
return RET_ERROR;
}
}
return RET_OK;
}
void NNIEManager::SetInputNum(int max_input_num) { nnie_cfg_.cfg_.max_input_num_ = max_input_num; }
int NNIEManager::Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs) {
@ -80,17 +188,33 @@ int NNIEManager::Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg
return RET_OK;
}
void NNIEManager::Release() { NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_); }
void NNIEManager::Release(bool resize_flag) {
for (auto &blob : blobs_) {
NNIE_MEM_FREE(blob->u64PhyAddr, blob->u64VirAddr);
blob->u64VirAddr = 0;
blob->u64PhyAddr = 0;
}
blobs_.clear();
if (resize_flag) {
for (auto &tensor : tensors_) {
tensor->SetData(nullptr);
tensor->SetAllocator(nullptr);
}
}
tensors_.clear();
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
}
int NNIEManager::GetOutputData(std::vector<mindspore::MSTensor> *outputs,
const std::vector<std::vector<int64_t>> &outputs_shape, bool run_box) {
int i, j, output_size = outputs->size();
int output_size = outputs->size();
if (output_size != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum) {
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
return RET_ERROR;
}
int i, j;
if (run_box) {
for (i = 0; i < output_size; i++) {
auto input_data_type = (*outputs)[i].DataType();
@ -164,6 +288,132 @@ int NNIEManager::FillRoiPooling(mindspore::MSTensor *input) {
return RET_OK;
}
int NNIEManager::SetAllocatorTensor(mindspore::MSTensor *tensor, SVP_SRC_BLOB_S *blob,
std::shared_ptr<Allocator> allocator) {
int step;
auto data_type = tensor->DataType();
if (data_type == DataType::kNumberTypeFloat32) {
step = sizeof(float);
} else if ((data_type == DataType::kNumberTypeUInt8) || (data_type == DataType::kNumberTypeInt8)) {
step = sizeof(unsigned char);
} else {
LOGE("Unsupported DataType!");
return RET_ERROR;
}
LOGI("\ninput %s :%d * %d = %d <-> %d", tensor->Name().c_str(), step, blob->unShape.stWhc.u32Width,
step * blob->unShape.stWhc.u32Width, blob->u32Stride);
if (blob->unShape.stWhc.u32Width * step == blob->u32Stride) {
if (((tensor->Data() == nullptr) || tensor->allocator() == allocator) && (blob->u64VirAddr != 0)) {
tensor->SetAllocator(allocator);
tensor->SetData(reinterpret_cast<void *>(blob->u64VirAddr));
LOGI("\nSet input %s allocator!", tensor->Name().c_str());
}
}
return RET_OK;
}
int NNIEManager::SetAllocatorInputs(std::vector<mindspore::MSTensor> *inputs, bool run_box,
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
size_t i, j, input_size = inputs->size();
if (seg_id >= nnie_cfg_.param_.model_->u32NetSegNum) {
LOGE("seg num err!");
return RET_ERROR;
}
if (!run_box) {
if ((input_size < kNumInput2) || (input_size - 1) != nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum) {
LOGE("Input Size Err!");
return RET_ERROR;
}
}
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum; i++) {
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].src_node_[i]) {
continue;
}
j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
if (j == (input_size - 1)) {
if (run_box && (*inputs)[i].Name() == "proposal") {
continue;
} else {
j = i;
LOGI("input tensor name(%s) can't match wk node name(%s).", (*inputs)[i].Name().c_str(),
nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
}
}
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].src_[i];
SVP_BLOB_TYPE_E src_type = blob->enType;
if (src_type != SVP_BLOB_TYPE_SEQ_S32) {
SetAllocatorTensor(&(*inputs)[j], blob, allocator);
}
}
return RET_OK;
}
int NNIEManager::SetAllocatorOutputs(std::vector<mindspore::MSTensor> *outputs, bool run_box,
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
size_t i, j;
size_t output_size = outputs->size();
if (output_size != nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum) {
LOGE("seg%d: %d output tensors are required.", seg_id, nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum);
return RET_ERROR;
}
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum; i++) {
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].dst_node_[i]) {
continue;
}
j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
if (j == output_size) {
j = i;
LOGI("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
}
auto output_data_type = (*outputs)[j].DataType();
if (output_data_type == DataType::kNumberTypeFloat32) {
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].dst_[i];
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
LOGE("Nnie output type error");
return RET_ERROR;
} else if (SVP_BLOB_TYPE_SEQ_S32 != blob->enType) {
if ((blob->unShape.stWhc.u32Width * sizeof(float) == blob->u32Stride)) {
if ((((*outputs)[j].Data() == nullptr) || (*outputs)[j].allocator() == allocator) &&
(blob->u64VirAddr != 0)) {
(*outputs)[j].SetAllocator(allocator);
(*outputs)[j].SetData(reinterpret_cast<void *>(blob->u64VirAddr));
LOGI("\nSet output %s allocator!", (*outputs)[j].Name().c_str());
}
}
}
} else {
LOGE("Unsupported DataType!");
return RET_ERROR;
}
}
return RET_OK;
}
int NNIEManager::SetAllocator(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
bool run_box = false;
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
run_box = true;
}
if (SetAllocatorInputs(inputs, run_box, allocator, seg_id) != RET_OK) {
LOGE("SetAllocatorInputs failed!");
return RET_ERROR;
}
if (SetAllocatorOutputs(outputs, run_box, allocator, seg_id) != RET_OK) {
LOGE("SetAllocatorOutputs failed!");
return RET_ERROR;
}
return RET_OK;
}
int NNIEManager::FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id) {
bool run_box = false;
size_t i, j;

View File

@ -17,17 +17,33 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
#include <vector>
#include <string>
#include <map>
#include <memory>
#include "include/errorcode.h"
#include "include/api/types.h"
#include "include/api/allocator.h"
#include "src/nnie_common.h"
#include "src/nnie_cfg_parser.h"
namespace mindspore {
namespace nnie {
class NNIEManager {
public:
static NNIEManager *GetInstance() {
static NNIEManager manager;
return &manager;
static NNIEManager *GetInstance(const void *model_buf) {
static std::map<const void *, NNIEManager *> managers_;
auto iter = managers_.find(model_buf);
if (iter != managers_.end()) {
return iter->second;
} else {
auto manager = new (std::nothrow) NNIEManager();
if (manager == nullptr) {
return manager;
} else {
managers_[model_buf] = manager;
return manager;
}
}
}
NNIEManager() {}
@ -36,26 +52,65 @@ class NNIEManager {
int Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
int CfgInit(int max_roi_num, int step, const std::vector<int> &core_id);
int CfgInit(const Flags &flags, int max_seg_id);
void SetInputNum(int max_input_num);
int SetAllocatorInputs(std::vector<mindspore::MSTensor> *inputs, bool run_box, std::shared_ptr<Allocator> allocator,
unsigned int seg_id);
int SetAllocatorOutputs(std::vector<mindspore::MSTensor> *outputs, bool run_box, std::shared_ptr<Allocator> allocator,
unsigned int seg_id);
int SetAllocator(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
std::shared_ptr<Allocator> allocator, unsigned int seg_id);
int FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id);
int Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
const std::vector<std::vector<int64_t>> &outputs_shape);
void Release();
void Release(bool resize_flag);
int LoadInputs(std::vector<mindspore::MSTensor> *inputs, std::shared_ptr<Allocator> allocator);
int LoadOutputs(std::vector<mindspore::MSTensor> *outputs, std::shared_ptr<Allocator> allocator);
int SetBlobAddr(SVP_SRC_BLOB_S *blob, HI_U64 virt, mindspore::MSTensor *tensor, std::shared_ptr<Allocator> allocator);
void SetMaxSegId(int max_id) {
if (max_id > max_seg_id_) {
max_seg_id_ = max_id;
}
}
inline int GetMaxSegId() { return max_seg_id_; }
inline Flags *GetFlags() { return &flags_; }
inline bool GetLoadModel() { return load_model_; }
void SetLoadModel(bool flag) { load_model_ = flag; }
private:
int SetAllocatorTensor(mindspore::MSTensor *tensor, SVP_SRC_BLOB_S *blob, std::shared_ptr<Allocator> allocator);
int GetOutputData(std::vector<mindspore::MSTensor> *outputs, const std::vector<std::vector<int64_t>> &outputs_shape,
bool run_box = false);
int MallocBlobData(SVP_SRC_BLOB_S *blob, mindspore::MSTensor *tensor, HI_U32 blob_size);
int FillRoiPooling(mindspore::MSTensor *input);
char *wk_model_ = nullptr;
int model_size_ = 0;
NnieRunCfg nnie_cfg_;
int max_seg_id_ = 0;
Flags flags_;
bool load_model_ = false;
std::vector<SVP_SRC_BLOB_S *> blobs_;
std::vector<mindspore::MSTensor *> tensors_;
};
} // namespace nnie
} // namespace mindspore

View File

@ -16,6 +16,7 @@
#include "src/nnie_memory.h"
#include "include/hi_common.h"
#include "include/mpi_sys.h"
#include "src/nnie_common.h"
namespace mindspore {
namespace nnie {
@ -31,5 +32,14 @@ HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_add
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size) {
return HI_MPI_SYS_MmzFlushCache(phy_addr, pv_vir_addr, size);
}
HI_S32 NnieGetVirMemInfo(HI_U64 pv_vir_addr, HI_U64 *phy_addr) {
SYS_VIRMEM_INFO_S mem_info;
HI_S32 ret = HI_MPI_SYS_GetVirMemInfo(NNIE_CONVERT_64BIT_ADDR(HI_VOID, pv_vir_addr), &mem_info);
if (ret == HI_SUCCESS) {
*phy_addr = mem_info.u64PhyAddr;
}
return ret;
}
} // namespace nnie
} // namespace mindspore

View File

@ -43,6 +43,8 @@ HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size);
HI_S32 NnieGetVirMemInfo(HI_U64 pv_vir_addr, HI_U64 *phy_addr);
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_

View File

@ -34,15 +34,14 @@ struct Context::Data {
#ifdef PARALLEL_INFERENCE
int32_t thread_num = 8;
bool enable_parallel_ = false;
int affinity_mode_ = 1;
int32_t inter_op_parallel_num_ = 4;
#else
int32_t thread_num = 2;
bool enable_parallel_ = false;
int affinity_mode_ = 0;
int32_t inter_op_parallel_num_ = 1;
#endif
bool enable_parallel_ = false;
std::vector<int32_t> affinity_core_list_;
std::shared_ptr<Delegate> delegate = nullptr;
bool float_mode = false;

View File

@ -245,6 +245,14 @@ Status Model::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor>
return impl_->Predict(inputs, outputs, before, after);
}
Status Model::Predict(const MSKernelCallBack &before, const MSKernelCallBack &after) {
if (impl_ == nullptr) {
MS_LOG(ERROR) << "Model implement is null.";
return kLiteNullptr;
}
return impl_->Predict(before, after);
}
Status Model::PredictWithPreprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs,
const MSKernelCallBack &before, const MSKernelCallBack &after) {
MS_LOG(ERROR) << "Unsupported Feature.";

View File

@ -341,6 +341,32 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
return kSuccess;
}
Status ModelImpl::Predict(const MSKernelCallBack &before, const MSKernelCallBack &after) {
if (session_ == nullptr) {
MS_LOG(ERROR) << "Run graph failed.";
return kLiteError;
}
auto input_tensors = session_->GetInputs();
if (input_tensors.empty()) {
MS_LOG(ERROR) << "Failed to get input tensor.";
return kLiteError;
}
for (auto &input : input_tensors) {
if (input->data() == nullptr) {
MS_LOG(ERROR) << "Tensor " << input->tensor_name() << " has no data.";
return kLiteInputTensorError;
}
}
auto ret = RunGraph(before, after);
if (ret != kSuccess) {
MS_LOG(ERROR) << "Run graph failed : " << ret;
return ret;
}
MS_LOG(DEBUG) << "Run graph success.";
return kSuccess;
}
std::vector<MSTensor> ModelImpl::GetInputs() {
std::vector<MSTensor> empty;
if (session_ == nullptr) {

View File

@ -72,6 +72,8 @@ class ModelImpl {
Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs, const MSKernelCallBack &before,
const MSKernelCallBack &after);
Status Predict(const MSKernelCallBack &before, const MSKernelCallBack &after);
lite::LiteSession *CreateLiteSession(lite::InnerContext *context);
Status LoadConfig(const std::string &config_path);

View File

@ -51,6 +51,14 @@ function Run_Hi3516() {
else
run_result='hi3516: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
echo './benchmark --modelFile='${basepath}'/'${model_name}'.ms --inputShapes='${input_shapes}' --warmUpLoopCount=0 --loopCount=2 --configFile='${NNIE_CONFIG_FILE} >> "${run_hi3516_log_file}"
./benchmark --modelFile=${basepath}/${model_name}.ms --inputShapes=${input_shapes} --warmUpLoopCount=0 --loopCount=2 --configFile=${NNIE_CONFIG_FILE}>> "${run_hi3516_log_file}"
if [ $? = 0 ]; then
run_result='hi3516: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='hi3516: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_nnie_config}
}
@ -97,6 +105,7 @@ else
echo "Run benchmark failed"
MS_PRINT_TESTCASE_END_MSG
cat ${run_benchmark_result_file}
cat ${run_hi3516_log_file}
MS_PRINT_TESTCASE_END_MSG
rm -rf ${basepath}/*.ms
rm -rf ${basepath}/libmslite_nnie.so