forked from mindspore-Ecosystem/mindspore
!34113 [MS][LITE]Add Hi35xx keep original output
Merge pull request !34113 from gongdaguo1/add_hi3516_origin_output
This commit is contained in:
commit
16f9b62536
|
@ -161,6 +161,14 @@ class MS_API Model {
|
|||
Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
|
||||
const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
|
||||
|
||||
/// \brief Inference model.
|
||||
///
|
||||
/// \param[in] before CallBack before predict.
|
||||
/// \param[in] after CallBack after predict.
|
||||
///
|
||||
/// \return Status.
|
||||
Status Predict(const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
|
||||
|
||||
/// \brief Train model by step.
|
||||
///
|
||||
/// \param[in] before CallBack before predict.
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef NNIE_SRC_CUSTOM_ALLOCATOR_H_
|
||||
#define NNIE_SRC_CUSTOM_ALLOCATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <atomic>
|
||||
#include "include/api/allocator.h"
|
||||
#include "include/hi_type.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
class CustomAllocator : public Allocator {
|
||||
public:
|
||||
CustomAllocator() {}
|
||||
~CustomAllocator() override{};
|
||||
void *Malloc(size_t size) override { return nullptr; }
|
||||
void Free(void *ptr) override {}
|
||||
int RefCount(void *ptr) override { return 1; }
|
||||
int SetRefCount(void *ptr, int ref_count) override { return ref_count; }
|
||||
int DecRefCount(void *ptr, int ref_count) override { return 1; }
|
||||
int IncRefCount(void *ptr, int ref_count) override { return 1; }
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // NNIE_SRC_CUSTOM_ALLOCATOR_H_
|
|
@ -20,9 +20,6 @@
|
|||
#include "schema/model_generated.h"
|
||||
#include "include/registry/register_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
@ -30,29 +27,29 @@ using mindspore::schema::PrimitiveType_Custom;
|
|||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
bool CustomCPUKernel::load_model_ = false;
|
||||
static std::shared_ptr<Allocator> kCustomAllocator = std::make_shared<nnie::CustomAllocator>();
|
||||
|
||||
int CustomCPUKernel::run_seg_ = 0;
|
||||
bool CustomCPUKernel::roi_used_ = false;
|
||||
int CustomCPUKernel::Prepare() {
|
||||
if (!load_model_) {
|
||||
Flags flags;
|
||||
if (flags.Init(*this) != RET_OK) {
|
||||
if ((manager_) == nullptr) {
|
||||
LOGE("manager_ is nullptr.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!manager_->GetLoadModel()) {
|
||||
if (manager_->GetFlags()->Init(*this) != RET_OK) {
|
||||
LOGE("Nnie config init fail.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->CfgInit(flags.max_roi_num_, flags.time_step_, flags.core_ids_) != RET_OK) {
|
||||
if (manager_->CfgInit(*manager_->GetFlags(), manager_->GetMaxSegId()) != RET_OK) {
|
||||
LOGE("Nnie init cfg fail.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
|
||||
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
|
||||
if (manager_->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
|
||||
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
|
||||
LOGI("Load WK Model Fail.");
|
||||
return RET_OK;
|
||||
}
|
||||
load_model_ = true;
|
||||
manager_->SetLoadModel(true);
|
||||
}
|
||||
outputs_shapes_.resize(outputs_.size());
|
||||
for (size_t i = 0; i < outputs_.size(); i++) {
|
||||
|
@ -62,38 +59,51 @@ int CustomCPUKernel::Prepare() {
|
|||
}
|
||||
|
||||
int CustomCPUKernel::ReSize() {
|
||||
if (load_model_) {
|
||||
nnie::NNIEManager::GetInstance()->Release();
|
||||
load_model_ = false;
|
||||
if (manager_->GetLoadModel() && seg_id() == 0) {
|
||||
manager_->Release(true);
|
||||
manager_->SetLoadModel(false);
|
||||
}
|
||||
|
||||
return Prepare();
|
||||
}
|
||||
|
||||
int CustomCPUKernel::Execute() {
|
||||
if (!load_model_) {
|
||||
if (!manager_->GetLoadModel()) {
|
||||
LOGE("WK Model is not load.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
run_seg_ = seg_id_;
|
||||
Flags *flags = manager_->GetFlags();
|
||||
if (flags->keep_origin_output_) {
|
||||
if (seg_id_ == 0) {
|
||||
if (manager_->LoadInputs(&inputs_, kCustomAllocator) != RET_OK) {
|
||||
LOGE("Unable to find the physical address corresponding to the input tensor.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (seg_id_ == manager_->GetMaxSegId()) {
|
||||
if (manager_->LoadOutputs(&outputs_, kCustomAllocator) != RET_OK) {
|
||||
LOGE("Unable to find the physical address corresponding to the output tensor.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->FillData(&inputs_, run_seg_) != RET_OK) {
|
||||
if (manager_->FillData(&inputs_, seg_id_) != RET_OK) {
|
||||
LOGE("Fail Fill Data.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->Run(&outputs_, run_seg_, outputs_shapes_) != RET_OK) {
|
||||
if (manager_->Run(&outputs_, seg_id_, outputs_shapes_) != RET_OK) {
|
||||
LOGE("Fail WK Run.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
run_seg_++;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
CustomCPUKernel::~CustomCPUKernel() {
|
||||
if (load_model_) {
|
||||
nnie::NNIEManager::GetInstance()->Release();
|
||||
load_model_ = false;
|
||||
if (manager_->GetLoadModel()) {
|
||||
manager_->Release(false);
|
||||
manager_->SetLoadModel(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -159,7 +169,13 @@ std::shared_ptr<mindspore::kernel::Kernel> CustomCreateKernel(const std::vector<
|
|||
forward_bbox = true;
|
||||
}
|
||||
}
|
||||
auto kernel = std::make_shared<CustomCPUKernel>(ndims, forward_bbox, inputs, outputs, primitive, ctx);
|
||||
auto model_buf = static_cast<const void *>(inputs[inputs.size() - 1].Data().get());
|
||||
auto manager = nnie::NNIEManager::GetInstance(model_buf);
|
||||
if ((manager) == nullptr) {
|
||||
LOGE("malloc NNIEManager failed.");
|
||||
return nullptr;
|
||||
}
|
||||
auto kernel = std::make_shared<CustomCPUKernel>(manager, ndims, forward_bbox, inputs, outputs, primitive, ctx);
|
||||
if (kernel == nullptr) {
|
||||
LOGE("new custom kernel is nullptr");
|
||||
return nullptr;
|
||||
|
|
|
@ -19,10 +19,16 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "include/schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
#include "include/api/kernel.h"
|
||||
#include "src/custom_infer.h"
|
||||
#include "include/hi_type.h"
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "src/custom_allocator.h"
|
||||
|
||||
using mindspore::kernel::Kernel;
|
||||
using mindspore::tensor::MSTensor;
|
||||
|
@ -30,12 +36,14 @@ namespace mindspore {
|
|||
namespace nnie {
|
||||
class CustomCPUKernel : public Kernel {
|
||||
public:
|
||||
CustomCPUKernel(int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
|
||||
CustomCPUKernel(nnie::NNIEManager *manager, int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
|
||||
const std::vector<MSTensor> &outputs, const mindspore::schema::Primitive *primitive,
|
||||
const mindspore::Context *ctx)
|
||||
: Kernel(inputs, outputs, primitive, ctx), seg_id_(seg_id), forward_bbox_(forward_bbox) {
|
||||
if (forward_bbox) {
|
||||
roi_used_ = true;
|
||||
: Kernel(inputs, outputs, primitive, ctx), manager_(manager), seg_id_(seg_id), forward_bbox_(forward_bbox) {
|
||||
if ((manager_) == nullptr) {
|
||||
LOGE("manager_ is nullptr.");
|
||||
} else {
|
||||
manager_->SetMaxSegId(seg_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,9 +62,7 @@ class CustomCPUKernel : public Kernel {
|
|||
void set_forward_bbox(bool flag) { forward_bbox_ = flag; }
|
||||
|
||||
private:
|
||||
static bool load_model_;
|
||||
static int run_seg_;
|
||||
static bool roi_used_;
|
||||
nnie::NNIEManager *manager_ = nullptr;
|
||||
int seg_id_ = 0;
|
||||
bool forward_bbox_ = false;
|
||||
std::vector<std::vector<int64_t>> outputs_shapes_;
|
||||
|
|
|
@ -34,6 +34,7 @@ namespace {
|
|||
constexpr auto kTimeStep = "TimeStep";
|
||||
constexpr auto kMazRoiNum = "MaxROINum";
|
||||
constexpr auto kCoreIds = "CoreIds";
|
||||
constexpr auto kKeepOrigin = "KeepOriginalOutput";
|
||||
constexpr auto DELIM = ",";
|
||||
constexpr int MAX_CORE_ID = 7;
|
||||
} // namespace
|
||||
|
@ -46,25 +47,49 @@ void PrintInvalidChar(const std::string &key, const std::string &dat) {
|
|||
LOGE(message.c_str());
|
||||
}
|
||||
|
||||
int Flags::Init(const kernel::Kernel &kernel) {
|
||||
auto nnie_arg = kernel.GetConfig("nnie");
|
||||
if (nnie_arg.find(kTimeStep) != nnie_arg.end()) {
|
||||
if (IsValidUnsignedNum(nnie_arg.at(kTimeStep)) == true) {
|
||||
this->time_step_ = stoi(nnie_arg.at(kTimeStep));
|
||||
int Flags::ParserInt(const std::map<std::string, std::string> &nnie_arg, const std::string key, int *val) {
|
||||
auto iter = nnie_arg.find(key);
|
||||
if (iter != nnie_arg.end()) {
|
||||
auto str = iter->second;
|
||||
if (IsValidUnsignedNum(str) == true) {
|
||||
*val = stoi(str);
|
||||
} else {
|
||||
PrintInvalidChar(kTimeStep, nnie_arg.at(kTimeStep));
|
||||
PrintInvalidChar(key, str);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
if (nnie_arg.find(kMazRoiNum) != nnie_arg.end()) {
|
||||
if (IsValidUnsignedNum(nnie_arg.at(kMazRoiNum)) == true) {
|
||||
this->max_roi_num_ = stoi(nnie_arg.at(kMazRoiNum));
|
||||
int Flags::ParserBool(const std::map<std::string, std::string> &nnie_arg, const std::string key, bool *val) {
|
||||
auto iter = nnie_arg.find(key);
|
||||
if (iter != nnie_arg.end()) {
|
||||
auto str = iter->second;
|
||||
if (str.find("on") != std::string::npos) {
|
||||
*val = true;
|
||||
} else if (str.find("off") != std::string::npos) {
|
||||
*val = false;
|
||||
} else {
|
||||
PrintInvalidChar(kMazRoiNum, nnie_arg.at(kMazRoiNum));
|
||||
PrintInvalidChar(key, str);
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Flags::Init(const kernel::Kernel &kernel) {
|
||||
auto nnie_arg = kernel.GetConfig("nnie");
|
||||
if (ParserInt(nnie_arg, kTimeStep, &this->time_step_) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (ParserInt(nnie_arg, kMazRoiNum, &this->max_roi_num_) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (ParserBool(nnie_arg, kKeepOrigin, &this->keep_origin_output_) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie_arg.find(kCoreIds) != nnie_arg.end()) {
|
||||
auto ids = nnie_arg.at(kCoreIds);
|
||||
|
@ -85,6 +110,7 @@ int Flags::Init(const kernel::Kernel &kernel) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace nnie
|
||||
|
|
|
@ -16,10 +16,17 @@
|
|||
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
|
||||
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "include/api/kernel.h"
|
||||
#include "include/hi_type.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
typedef struct {
|
||||
HI_U64 phy_;
|
||||
HI_U32 size_;
|
||||
} MEM_ITEM;
|
||||
/**
|
||||
* Flags is a config container.
|
||||
* Member objects:
|
||||
|
@ -39,6 +46,11 @@ class Flags {
|
|||
int time_step_{1};
|
||||
int max_roi_num_{300};
|
||||
std::vector<int> core_ids_{0};
|
||||
bool keep_origin_output_{false};
|
||||
|
||||
private:
|
||||
int ParserInt(const std::map<std::string, std::string> &nnie_arg, const std::string key, int *val);
|
||||
int ParserBool(const std::map<std::string, std::string> &nnie_arg, const std::string key, bool *val);
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -26,6 +26,7 @@ using mindspore::lite::RET_OK;
|
|||
namespace mindspore {
|
||||
namespace nnie {
|
||||
constexpr int kSleepUs = 100;
|
||||
constexpr int kCompressionWidth = 2;
|
||||
static void NnieParamRelease(NnieParam *nnie_param) {
|
||||
if (nnie_param == nullptr) {
|
||||
return;
|
||||
|
@ -141,7 +142,8 @@ static void FillForwardInfo(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
|
|||
}
|
||||
|
||||
static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32 total_step, SVP_BLOB_S blob[],
|
||||
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool *mem_alloc = nullptr) {
|
||||
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool malloc_allow,
|
||||
bool *mem_alloc = nullptr) {
|
||||
HI_U32 i = 0;
|
||||
HI_U32 size;
|
||||
HI_U32 stride;
|
||||
|
@ -173,7 +175,9 @@ static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32
|
|||
blob_size[i] = 0;
|
||||
}
|
||||
}
|
||||
*total_size += blob_size[i];
|
||||
if (malloc_allow) {
|
||||
*total_size += blob_size[i];
|
||||
}
|
||||
blob[i].u32Stride = stride;
|
||||
}
|
||||
}
|
||||
|
@ -208,18 +212,71 @@ static int GetTaskAndBlobBufSize(NnieCfg *nnie_cfg, NnieParam *nnie_param, HI_U3
|
|||
j);
|
||||
}
|
||||
}
|
||||
bool malloc_allow = (!nnie_cfg->pass_align16_io_) || i != 0;
|
||||
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astSrcNode[0]), nnie_param->model_->astSeg[i].u16SrcNum, total_step,
|
||||
&(nnie_param->seg_data_[i].src_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].src_size_[0]),
|
||||
&(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
|
||||
malloc_allow, &(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
|
||||
|
||||
malloc_allow = (!nnie_cfg->pass_align16_io_) || (i + 1) != nnie_param->model_->u32NetSegNum;
|
||||
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astDstNode[0]), nnie_param->model_->astSeg[i].u16DstNum, total_step,
|
||||
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]));
|
||||
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]),
|
||||
malloc_allow);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieSetBlobAddr(HI_U64 *phy_addr, HI_U8 **vir_addr, NnieParam *nnie_param, NnieBlobSize *blob_size,
|
||||
bool pass_align16_io) {
|
||||
HI_U32 i, j;
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
if ((!pass_align16_io) || i != 0) {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
if (j != 0) {
|
||||
*phy_addr += blob_size[i].src_size_[j - 1];
|
||||
*vir_addr += blob_size[i].src_size_[j - 1];
|
||||
}
|
||||
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
|
||||
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
|
||||
&(nnie_param->seg_data_[i].src_[j]))) {
|
||||
LOGE("ConnectNnieInnerNode failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
nnie_param->seg_data_[i].src_[j].u64PhyAddr = *phy_addr;
|
||||
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)*vir_addr;
|
||||
}
|
||||
}
|
||||
*phy_addr += blob_size[i].src_size_[j - 1];
|
||||
*vir_addr += blob_size[i].src_size_[j - 1];
|
||||
} else {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
nnie_param->seg_data_[i].src_[j].u64PhyAddr = 0;
|
||||
nnie_param->seg_data_[i].src_[j].u64VirAddr = 0;
|
||||
}
|
||||
}
|
||||
if ((!pass_align16_io) || (i + 1) != nnie_param->model_->u32NetSegNum) {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
|
||||
if (j != 0) {
|
||||
*phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
*vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
}
|
||||
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = *phy_addr;
|
||||
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)*vir_addr;
|
||||
}
|
||||
*phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
*vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
} else {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = 0;
|
||||
nnie_param->seg_data_[i].dst_[j].u64VirAddr = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
|
||||
HI_U32 i, j;
|
||||
HI_U32 i;
|
||||
HI_U32 total_size = 0, total_task_buf_size = 0, tmp_buf_size_ = 0;
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
HI_U32 off_set = 0;
|
||||
|
@ -288,36 +345,9 @@ static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
|
|||
|
||||
phy_addr = phy_addr + total_task_buf_size + tmp_buf_size_;
|
||||
vir_addr = vir_addr + total_task_buf_size + tmp_buf_size_;
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
if (j != 0) {
|
||||
phy_addr += blob_size[i].src_size_[j - 1];
|
||||
vir_addr += blob_size[i].src_size_[j - 1];
|
||||
}
|
||||
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
|
||||
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
|
||||
&(nnie_param->seg_data_[i].src_[j]))) {
|
||||
LOGE("ConnectNnieInnerNode failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
nnie_param->seg_data_[i].src_[j].u64PhyAddr = phy_addr;
|
||||
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
}
|
||||
}
|
||||
phy_addr += blob_size[i].src_size_[j - 1];
|
||||
vir_addr += blob_size[i].src_size_[j - 1];
|
||||
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
|
||||
if (j != 0) {
|
||||
phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
}
|
||||
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = phy_addr;
|
||||
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
}
|
||||
phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
if (NnieSetBlobAddr(&phy_addr, &vir_addr, nnie_param, blob_size, nnie_cfg->pass_align16_io_) != RET_OK) {
|
||||
LOGE("SetBlobAddr failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (has_roi) {
|
||||
nnie_param->rpn_bbox_.u64PhyAddr = phy_addr;
|
||||
|
@ -536,70 +566,108 @@ int FillByFloat(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_F
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
HI_U32 i, j, n, ret;
|
||||
HI_U32 height, width, channel, stride, dim;
|
||||
HI_U8 *input_addr_u8 = nullptr;
|
||||
HI_S32 *input_addr_s32 = nullptr;
|
||||
HI_U32 *step_addr_u32 = nullptr;
|
||||
HI_FLOAT *float_src_data = nullptr;
|
||||
HI_U8 *u8_src_data = nullptr;
|
||||
static int NnieFillSrcDataSeq(NnieCfg *nnie_cfg, SVP_SRC_BLOB_S *blob, HI_U32 input_size) {
|
||||
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
HI_U32 dim = blob->unShape.stSeq.u32Dim;
|
||||
HI_U32 stride = blob->u32Stride;
|
||||
HI_U32 i, j, n;
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 input_size = 1;
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
|
||||
for (n = 0; n < (HI_U32)size; n++) {
|
||||
input_size *= shape[n];
|
||||
}
|
||||
input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
dim = blob->unShape.stSeq.u32Dim;
|
||||
stride = blob->u32Stride;
|
||||
HI_U8 *input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
HI_S32 *input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
HI_FLOAT *float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
|
||||
if (input_size != total_step_num * dim) {
|
||||
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
for (j = 0; j < dim; j++) {
|
||||
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
|
||||
}
|
||||
input_addr_u8 += stride;
|
||||
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
|
||||
float_src_data += dim;
|
||||
}
|
||||
}
|
||||
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
HI_U32 GetBlobSize(const SVP_SRC_BLOB_S &blob) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob.enType) {
|
||||
HI_U32 stride = blob.u32Stride;
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob.unShape.stSeq.u64VirAddrStep);
|
||||
size_t n;
|
||||
for (n = 0; n < blob.u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
return total_step_num * stride;
|
||||
}
|
||||
|
||||
if (input_size != total_step_num * dim) {
|
||||
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
for (j = 0; j < dim; j++) {
|
||||
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
|
||||
}
|
||||
input_addr_u8 += stride;
|
||||
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
|
||||
float_src_data += dim;
|
||||
}
|
||||
}
|
||||
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
|
||||
HI_U32 stride = blob.u32Stride;
|
||||
HI_U32 height = blob.unShape.stWhc.u32Height;
|
||||
HI_U32 channel = blob.unShape.stWhc.u32Chn;
|
||||
if (SVP_BLOB_TYPE_YVU420SP == blob.enType) {
|
||||
return blob.u32Num * static_cast<HI_U32>(channel * height / kCompressionWidth) * stride;
|
||||
} else if (SVP_BLOB_TYPE_YVU422SP == blob.enType) {
|
||||
return blob.u32Num * height * kCompressionWidth * stride;
|
||||
} else {
|
||||
height = blob->unShape.stWhc.u32Height;
|
||||
width = blob->unShape.stWhc.u32Width;
|
||||
channel = blob->unShape.stWhc.u32Chn;
|
||||
stride = blob->u32Stride;
|
||||
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
|
||||
u8_src_data, input_addr_u8);
|
||||
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
|
||||
} else {
|
||||
if (SVP_BLOB_TYPE_U8 == blob->enType) {
|
||||
ret =
|
||||
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
|
||||
return blob.u32Num * channel * height * stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
HI_U32 i, ret;
|
||||
HI_U32 input_size = 1;
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
|
||||
for (i = 0; i < (HI_U32)size; i++) {
|
||||
input_size *= shape[i];
|
||||
}
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
return NnieFillSrcDataSeq(nnie_cfg, blob, input_size);
|
||||
} else {
|
||||
HI_U8 *input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
HI_S32 *input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
HI_FLOAT *float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
HI_U8 *u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
|
||||
HI_U32 height = blob->unShape.stWhc.u32Height;
|
||||
HI_U32 width = blob->unShape.stWhc.u32Width;
|
||||
HI_U32 channel = blob->unShape.stWhc.u32Chn;
|
||||
HI_U32 stride = blob->u32Stride;
|
||||
if (input_addr_u8 == u8_src_data) {
|
||||
if (blob->enType == SVP_BLOB_TYPE_S32) {
|
||||
for (i = 0; i < input_size; i++) {
|
||||
input_addr_s32[i] = float_src_data[i] * NNIE_QUANT_BASE;
|
||||
}
|
||||
} else {
|
||||
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
|
||||
input_addr_u8);
|
||||
LOGI("\ninput no memcpy");
|
||||
}
|
||||
} else {
|
||||
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
|
||||
u8_src_data, input_addr_u8);
|
||||
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
|
||||
} else {
|
||||
if (SVP_BLOB_TYPE_U8 == blob->enType) {
|
||||
ret =
|
||||
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
|
||||
} else {
|
||||
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
|
||||
input_addr_u8);
|
||||
}
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr),
|
||||
blob->u32Num * channel * height * stride);
|
||||
|
@ -608,42 +676,32 @@ static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataInd
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
static int NnieGetDstDataSEQ(SVP_SRC_BLOB_S *blob, HI_U32 input_num, NnieDataIndex *input_data_idx,
|
||||
HI_FLOAT *float_dst_data) {
|
||||
HI_U32 i, j, n;
|
||||
HI_U32 height, width, channel, stride, dim;
|
||||
HI_U8 *output_addr_u8 = nullptr;
|
||||
HI_S32 *output_addr_s32 = nullptr;
|
||||
HI_U32 *step_addr_u32 = nullptr;
|
||||
HI_FLOAT *float_dst_data = nullptr;
|
||||
HI_U32 dim = blob->unShape.stSeq.u32Dim;
|
||||
HI_U32 stride = blob->u32Stride;
|
||||
HI_U32 *step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 input_num = 1;
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
|
||||
for (n = 0; n < (HI_U32)size; n++) {
|
||||
input_num *= shape[n];
|
||||
}
|
||||
HI_U8 *output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
HI_S32 *output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
|
||||
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
|
||||
LOGE("Nnie output type error");
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
if (input_num != total_step_num * dim) {
|
||||
LOGE("input shape");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
dim = blob->unShape.stSeq.u32Dim;
|
||||
stride = blob->u32Stride;
|
||||
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
|
||||
if (input_data_idx->seg_idx_ == input_data_idx->max_seg_id_) {
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
if (input_num != total_step_num * dim) {
|
||||
LOGE("input shape");
|
||||
return RET_ERROR;
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
memcpy(float_dst_data, output_addr_u8, dim * sizeof(float));
|
||||
float_dst_data += dim;
|
||||
output_addr_u8 += stride;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
for (j = 0; j < dim; j++) {
|
||||
|
@ -654,23 +712,67 @@ static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataInde
|
|||
float_dst_data += dim;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
height = blob->unShape.stWhc.u32Height;
|
||||
width = blob->unShape.stWhc.u32Width;
|
||||
channel = blob->unShape.stWhc.u32Chn;
|
||||
stride = blob->u32Stride;
|
||||
if (input_num != height * channel * width * blob->u32Num) {
|
||||
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
|
||||
HI_U32 input_num = 1;
|
||||
for (HI_U32 i = 0; i < (HI_U32)size; i++) {
|
||||
input_num *= shape[i];
|
||||
}
|
||||
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
|
||||
LOGE("Nnie output type error");
|
||||
return RET_ERROR;
|
||||
}
|
||||
HI_FLOAT *float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
if (NnieGetDstDataSEQ(blob, input_num, input_data_idx, float_dst_data) != RET_OK) {
|
||||
LOGE("NnieGetDstDataSEQ error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < channel * height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
|
||||
} else {
|
||||
HI_U8 *output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
HI_S32 *output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
if (float_dst_data == reinterpret_cast<float *>(output_addr_s32)) {
|
||||
if (input_data_idx->seg_idx_ != input_data_idx->max_seg_id_) {
|
||||
for (HI_U32 i = 0; i < input_num; i++) {
|
||||
float_dst_data[i] = (HI_FLOAT)output_addr_s32[i] / NNIE_QUANT_BASE;
|
||||
}
|
||||
} else {
|
||||
LOGI("\noutput no memcpy");
|
||||
}
|
||||
} else {
|
||||
HI_U32 height = blob->unShape.stWhc.u32Height;
|
||||
HI_U32 width = blob->unShape.stWhc.u32Width;
|
||||
HI_U32 channel = blob->unShape.stWhc.u32Chn;
|
||||
HI_U32 stride = blob->u32Stride;
|
||||
if (input_num != height * channel * width * blob->u32Num) {
|
||||
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_data_idx->seg_idx_ == input_data_idx->max_seg_id_) {
|
||||
if (nnie_cfg->pass_align16_io_) {
|
||||
memcpy(float_dst_data, output_addr_u8, blob->u32Num * channel * height * stride);
|
||||
} else {
|
||||
for (HI_U32 i = 0; i < (blob->u32Num * channel * height); i++) {
|
||||
memcpy(float_dst_data, output_addr_u8, width * sizeof(float));
|
||||
float_dst_data += width;
|
||||
output_addr_u8 += stride;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (HI_U32 n = 0; n < blob->u32Num; n++) {
|
||||
for (HI_U32 i = 0; i < channel * height; i++) {
|
||||
for (HI_U32 j = 0; j < width; j++) {
|
||||
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
|
||||
}
|
||||
output_addr_u8 += stride;
|
||||
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
|
||||
float_dst_data += width;
|
||||
}
|
||||
}
|
||||
output_addr_u8 += stride;
|
||||
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
|
||||
float_dst_data += width;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,12 +19,14 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "include/api/types.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#include "include/hi_comm_svp.h"
|
||||
#include "include/hi_nnie.h"
|
||||
#include "include/mpi_nnie.h"
|
||||
#include "include/ir/dtype/type_id.h"
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
|
@ -70,9 +72,11 @@ typedef struct {
|
|||
SVP_NNIE_FORWARD_CTRL_S forward_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
SVP_NNIE_FORWARD_WITHBBOX_CTRL_S forward_with_bbox_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
NNIEMemCfg mem_cfg_;
|
||||
bool get_mem_strong;
|
||||
} NnieParam;
|
||||
|
||||
typedef struct {
|
||||
bool pass_align16_io_;
|
||||
HI_VOID *data_ptr_;
|
||||
HI_U32 max_input_num_;
|
||||
HI_U32 max_roi_num_;
|
||||
|
@ -85,6 +89,7 @@ typedef struct {
|
|||
typedef struct {
|
||||
HI_U32 seg_idx_;
|
||||
HI_U32 node_idx_;
|
||||
HI_U32 max_seg_id_;
|
||||
} NnieDataIndex;
|
||||
|
||||
typedef struct {
|
||||
|
@ -110,6 +115,8 @@ int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box);
|
|||
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, int id);
|
||||
|
||||
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index);
|
||||
|
||||
HI_U32 GetBlobSize(const SVP_SRC_BLOB_S &blob);
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_common.h"
|
||||
#include "src/nnie_print.h"
|
||||
|
@ -24,26 +27,29 @@ using mindspore::lite::RET_OK;
|
|||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
constexpr int kUINT16_MAX = 65535;
|
||||
constexpr int kNumInput2 = 2;
|
||||
int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core_id) {
|
||||
int NNIEManager::CfgInit(const Flags &flags, int max_seg_id) {
|
||||
memset(&nnie_cfg_, 0, sizeof(NnieRunCfg));
|
||||
|
||||
nnie_cfg_.cfg_.max_roi_num_ = max_roi_num;
|
||||
|
||||
nnie_cfg_.cfg_.step_ = step;
|
||||
if (core_id.size() == 1) {
|
||||
nnie_cfg_.cfg_.pass_align16_io_ = flags.keep_origin_output_;
|
||||
nnie_cfg_.param_.get_mem_strong = false;
|
||||
nnie_cfg_.run_idx_.max_seg_id_ = flags.keep_origin_output_ ? max_seg_id + 1 : kUINT16_MAX;
|
||||
nnie_cfg_.cfg_.max_roi_num_ = flags.max_roi_num_;
|
||||
nnie_cfg_.cfg_.step_ = flags.time_step_;
|
||||
if (flags.core_ids_.size() == 1) {
|
||||
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM; i++) {
|
||||
if (core_id[0] < SVP_NNIE_ID_BUTT) {
|
||||
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[0];
|
||||
if (flags.core_ids_[0] < SVP_NNIE_ID_BUTT) {
|
||||
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)flags.core_ids_[0];
|
||||
} else {
|
||||
LOGE("nnie core num toobig.\n");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < core_id.size(); i++) {
|
||||
if (core_id[i] < SVP_NNIE_ID_BUTT) {
|
||||
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[i];
|
||||
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < flags.core_ids_.size(); i++) {
|
||||
if (flags.core_ids_[i] < SVP_NNIE_ID_BUTT) {
|
||||
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)flags.core_ids_[i];
|
||||
} else {
|
||||
LOGE("nnie core num toobig.\n");
|
||||
return RET_ERROR;
|
||||
|
@ -51,6 +57,108 @@ int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::MallocBlobData(SVP_SRC_BLOB_S *blob, mindspore::MSTensor *tensor, HI_U32 blob_size) {
|
||||
auto ret = NnieMemMallocCached(tensor->Name().c_str(), nullptr, reinterpret_cast<HI_U64 *>(&blob->u64PhyAddr),
|
||||
reinterpret_cast<void **>(&blob->u64VirAddr), blob_size);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("Error,MallocBlobData failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
blobs_.push_back(blob);
|
||||
tensors_.push_back(tensor);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::SetBlobAddr(SVP_SRC_BLOB_S *blob, HI_U64 virt, mindspore::MSTensor *tensor,
|
||||
std::shared_ptr<Allocator> allocator) {
|
||||
HI_U32 blob_size = GetBlobSize(*blob);
|
||||
if (virt == 0) {
|
||||
auto iter = std::find(blobs_.begin(), blobs_.end(), blob);
|
||||
if (iter == blobs_.end()) {
|
||||
if (MallocBlobData(blob, tensor, blob_size) != RET_OK) {
|
||||
LOGE("Failed to malloc.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
tensor->SetAllocator(allocator);
|
||||
tensor->SetData(reinterpret_cast<void *>(blob->u64VirAddr));
|
||||
LOGI("\nSet %s allocator!", tensor->Name().c_str());
|
||||
} else {
|
||||
auto ret = NnieGetVirMemInfo(virt, &blob->u64PhyAddr);
|
||||
if (ret == HI_SUCCESS) {
|
||||
blob->u64VirAddr = virt;
|
||||
LOGI("Get physical address %llu.", blob->u64PhyAddr);
|
||||
} else {
|
||||
auto iter = std::find(blobs_.begin(), blobs_.end(), blob);
|
||||
if (iter == blobs_.end()) {
|
||||
if (MallocBlobData(blob, tensor, blob_size) != RET_OK) {
|
||||
LOGE("Error, tensor data pointer is not MMZ memory, failed to malloc.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::LoadInputs(std::vector<mindspore::MSTensor> *inputs, std::shared_ptr<Allocator> allocator) {
|
||||
size_t input_size = inputs->size();
|
||||
if ((input_size < kNumInput2) || (input_size - 1) != nnie_cfg_.param_.model_->astSeg[0].u16SrcNum) {
|
||||
LOGE("Input Size Err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nnie_cfg_.param_.model_->astSeg[0].u16SrcNum; i++) {
|
||||
size_t j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[0].astSrcNode[i].szName);
|
||||
if (j == (input_size - 1)) {
|
||||
j = i;
|
||||
LOGI("input tensor name(%s) can't match wk node name(%s).", (*inputs)[j].Name().c_str(),
|
||||
nnie_cfg_.param_.model_->astSeg[0].astSrcNode[i].szName);
|
||||
}
|
||||
HI_U64 virt = (HI_U64)(HI_UL)((*inputs)[j].Data().get());
|
||||
auto blob = &nnie_cfg_.param_.seg_data_[0].src_[i];
|
||||
if (SetBlobAddr(blob, virt, &(*inputs)[j], allocator) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::LoadOutputs(std::vector<mindspore::MSTensor> *outputs, std::shared_ptr<Allocator> allocator) {
|
||||
int output_size = outputs->size();
|
||||
HI_U32 seg_id = nnie_cfg_.model_.model_.u32NetSegNum - 1;
|
||||
if (output_size != nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum) {
|
||||
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
|
||||
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
|
||||
LOGE("Unsupported use PassAlign16InOutput!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum; i++) {
|
||||
int j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
|
||||
if (j == output_size) {
|
||||
j = i;
|
||||
LOGI("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
|
||||
nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
|
||||
}
|
||||
|
||||
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].dst_[i];
|
||||
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
|
||||
LOGE("Nnie output type error");
|
||||
return RET_ERROR;
|
||||
}
|
||||
HI_U64 virt = (HI_U64)(HI_UL)((*outputs)[j].Data().get());
|
||||
if (SetBlobAddr(blob, virt, &(*outputs)[j], allocator) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void NNIEManager::SetInputNum(int max_input_num) { nnie_cfg_.cfg_.max_input_num_ = max_input_num; }
|
||||
|
||||
int NNIEManager::Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs) {
|
||||
|
@ -80,17 +188,33 @@ int NNIEManager::Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
void NNIEManager::Release() { NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_); }
|
||||
void NNIEManager::Release(bool resize_flag) {
|
||||
for (auto &blob : blobs_) {
|
||||
NNIE_MEM_FREE(blob->u64PhyAddr, blob->u64VirAddr);
|
||||
blob->u64VirAddr = 0;
|
||||
blob->u64PhyAddr = 0;
|
||||
}
|
||||
blobs_.clear();
|
||||
if (resize_flag) {
|
||||
for (auto &tensor : tensors_) {
|
||||
tensor->SetData(nullptr);
|
||||
tensor->SetAllocator(nullptr);
|
||||
}
|
||||
}
|
||||
tensors_.clear();
|
||||
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
|
||||
}
|
||||
|
||||
int NNIEManager::GetOutputData(std::vector<mindspore::MSTensor> *outputs,
|
||||
const std::vector<std::vector<int64_t>> &outputs_shape, bool run_box) {
|
||||
int i, j, output_size = outputs->size();
|
||||
int output_size = outputs->size();
|
||||
if (output_size != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum) {
|
||||
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
|
||||
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
if (run_box) {
|
||||
for (i = 0; i < output_size; i++) {
|
||||
auto input_data_type = (*outputs)[i].DataType();
|
||||
|
@ -164,6 +288,132 @@ int NNIEManager::FillRoiPooling(mindspore::MSTensor *input) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::SetAllocatorTensor(mindspore::MSTensor *tensor, SVP_SRC_BLOB_S *blob,
|
||||
std::shared_ptr<Allocator> allocator) {
|
||||
int step;
|
||||
auto data_type = tensor->DataType();
|
||||
if (data_type == DataType::kNumberTypeFloat32) {
|
||||
step = sizeof(float);
|
||||
} else if ((data_type == DataType::kNumberTypeUInt8) || (data_type == DataType::kNumberTypeInt8)) {
|
||||
step = sizeof(unsigned char);
|
||||
} else {
|
||||
LOGE("Unsupported DataType!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
LOGI("\ninput %s :%d * %d = %d <-> %d", tensor->Name().c_str(), step, blob->unShape.stWhc.u32Width,
|
||||
step * blob->unShape.stWhc.u32Width, blob->u32Stride);
|
||||
|
||||
if (blob->unShape.stWhc.u32Width * step == blob->u32Stride) {
|
||||
if (((tensor->Data() == nullptr) || tensor->allocator() == allocator) && (blob->u64VirAddr != 0)) {
|
||||
tensor->SetAllocator(allocator);
|
||||
tensor->SetData(reinterpret_cast<void *>(blob->u64VirAddr));
|
||||
LOGI("\nSet input %s allocator!", tensor->Name().c_str());
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::SetAllocatorInputs(std::vector<mindspore::MSTensor> *inputs, bool run_box,
|
||||
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
|
||||
size_t i, j, input_size = inputs->size();
|
||||
if (seg_id >= nnie_cfg_.param_.model_->u32NetSegNum) {
|
||||
LOGE("seg num err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (!run_box) {
|
||||
if ((input_size < kNumInput2) || (input_size - 1) != nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum) {
|
||||
LOGE("Input Size Err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum; i++) {
|
||||
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].src_node_[i]) {
|
||||
continue;
|
||||
}
|
||||
j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
|
||||
if (j == (input_size - 1)) {
|
||||
if (run_box && (*inputs)[i].Name() == "proposal") {
|
||||
continue;
|
||||
} else {
|
||||
j = i;
|
||||
LOGI("input tensor name(%s) can't match wk node name(%s).", (*inputs)[i].Name().c_str(),
|
||||
nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
|
||||
}
|
||||
}
|
||||
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].src_[i];
|
||||
SVP_BLOB_TYPE_E src_type = blob->enType;
|
||||
|
||||
if (src_type != SVP_BLOB_TYPE_SEQ_S32) {
|
||||
SetAllocatorTensor(&(*inputs)[j], blob, allocator);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::SetAllocatorOutputs(std::vector<mindspore::MSTensor> *outputs, bool run_box,
|
||||
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
|
||||
size_t i, j;
|
||||
size_t output_size = outputs->size();
|
||||
if (output_size != nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum) {
|
||||
LOGE("seg%d: %d output tensors are required.", seg_id, nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16DstNum; i++) {
|
||||
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].dst_node_[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
|
||||
if (j == output_size) {
|
||||
j = i;
|
||||
LOGI("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
|
||||
nnie_cfg_.param_.model_->astSeg[seg_id].astDstNode[i].szName);
|
||||
}
|
||||
|
||||
auto output_data_type = (*outputs)[j].DataType();
|
||||
if (output_data_type == DataType::kNumberTypeFloat32) {
|
||||
SVP_SRC_BLOB_S *blob = &nnie_cfg_.param_.seg_data_[seg_id].dst_[i];
|
||||
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
|
||||
LOGE("Nnie output type error");
|
||||
return RET_ERROR;
|
||||
} else if (SVP_BLOB_TYPE_SEQ_S32 != blob->enType) {
|
||||
if ((blob->unShape.stWhc.u32Width * sizeof(float) == blob->u32Stride)) {
|
||||
if ((((*outputs)[j].Data() == nullptr) || (*outputs)[j].allocator() == allocator) &&
|
||||
(blob->u64VirAddr != 0)) {
|
||||
(*outputs)[j].SetAllocator(allocator);
|
||||
(*outputs)[j].SetData(reinterpret_cast<void *>(blob->u64VirAddr));
|
||||
LOGI("\nSet output %s allocator!", (*outputs)[j].Name().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOGE("Unsupported DataType!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::SetAllocator(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
std::shared_ptr<Allocator> allocator, unsigned int seg_id) {
|
||||
bool run_box = false;
|
||||
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
|
||||
run_box = true;
|
||||
}
|
||||
if (SetAllocatorInputs(inputs, run_box, allocator, seg_id) != RET_OK) {
|
||||
LOGE("SetAllocatorInputs failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (SetAllocatorOutputs(outputs, run_box, allocator, seg_id) != RET_OK) {
|
||||
LOGE("SetAllocatorOutputs failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id) {
|
||||
bool run_box = false;
|
||||
size_t i, j;
|
||||
|
|
|
@ -17,17 +17,33 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "include/errorcode.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/allocator.h"
|
||||
#include "src/nnie_common.h"
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
class NNIEManager {
|
||||
public:
|
||||
static NNIEManager *GetInstance() {
|
||||
static NNIEManager manager;
|
||||
return &manager;
|
||||
static NNIEManager *GetInstance(const void *model_buf) {
|
||||
static std::map<const void *, NNIEManager *> managers_;
|
||||
auto iter = managers_.find(model_buf);
|
||||
if (iter != managers_.end()) {
|
||||
return iter->second;
|
||||
} else {
|
||||
auto manager = new (std::nothrow) NNIEManager();
|
||||
if (manager == nullptr) {
|
||||
return manager;
|
||||
} else {
|
||||
managers_[model_buf] = manager;
|
||||
return manager;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NNIEManager() {}
|
||||
|
@ -36,26 +52,65 @@ class NNIEManager {
|
|||
|
||||
int Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
|
||||
|
||||
int CfgInit(int max_roi_num, int step, const std::vector<int> &core_id);
|
||||
int CfgInit(const Flags &flags, int max_seg_id);
|
||||
|
||||
void SetInputNum(int max_input_num);
|
||||
|
||||
int SetAllocatorInputs(std::vector<mindspore::MSTensor> *inputs, bool run_box, std::shared_ptr<Allocator> allocator,
|
||||
unsigned int seg_id);
|
||||
|
||||
int SetAllocatorOutputs(std::vector<mindspore::MSTensor> *outputs, bool run_box, std::shared_ptr<Allocator> allocator,
|
||||
unsigned int seg_id);
|
||||
|
||||
int SetAllocator(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
std::shared_ptr<Allocator> allocator, unsigned int seg_id);
|
||||
|
||||
int FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id);
|
||||
|
||||
int Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
|
||||
const std::vector<std::vector<int64_t>> &outputs_shape);
|
||||
|
||||
void Release();
|
||||
void Release(bool resize_flag);
|
||||
|
||||
int LoadInputs(std::vector<mindspore::MSTensor> *inputs, std::shared_ptr<Allocator> allocator);
|
||||
|
||||
int LoadOutputs(std::vector<mindspore::MSTensor> *outputs, std::shared_ptr<Allocator> allocator);
|
||||
|
||||
int SetBlobAddr(SVP_SRC_BLOB_S *blob, HI_U64 virt, mindspore::MSTensor *tensor, std::shared_ptr<Allocator> allocator);
|
||||
|
||||
void SetMaxSegId(int max_id) {
|
||||
if (max_id > max_seg_id_) {
|
||||
max_seg_id_ = max_id;
|
||||
}
|
||||
}
|
||||
|
||||
inline int GetMaxSegId() { return max_seg_id_; }
|
||||
|
||||
inline Flags *GetFlags() { return &flags_; }
|
||||
|
||||
inline bool GetLoadModel() { return load_model_; }
|
||||
|
||||
void SetLoadModel(bool flag) { load_model_ = flag; }
|
||||
|
||||
private:
|
||||
int SetAllocatorTensor(mindspore::MSTensor *tensor, SVP_SRC_BLOB_S *blob, std::shared_ptr<Allocator> allocator);
|
||||
|
||||
int GetOutputData(std::vector<mindspore::MSTensor> *outputs, const std::vector<std::vector<int64_t>> &outputs_shape,
|
||||
bool run_box = false);
|
||||
|
||||
int MallocBlobData(SVP_SRC_BLOB_S *blob, mindspore::MSTensor *tensor, HI_U32 blob_size);
|
||||
|
||||
int FillRoiPooling(mindspore::MSTensor *input);
|
||||
char *wk_model_ = nullptr;
|
||||
|
||||
int model_size_ = 0;
|
||||
|
||||
NnieRunCfg nnie_cfg_;
|
||||
int max_seg_id_ = 0;
|
||||
Flags flags_;
|
||||
bool load_model_ = false;
|
||||
std::vector<SVP_SRC_BLOB_S *> blobs_;
|
||||
std::vector<mindspore::MSTensor *> tensors_;
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "src/nnie_memory.h"
|
||||
#include "include/hi_common.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "src/nnie_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
|
@ -31,5 +32,14 @@ HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_add
|
|||
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size) {
|
||||
return HI_MPI_SYS_MmzFlushCache(phy_addr, pv_vir_addr, size);
|
||||
}
|
||||
|
||||
HI_S32 NnieGetVirMemInfo(HI_U64 pv_vir_addr, HI_U64 *phy_addr) {
|
||||
SYS_VIRMEM_INFO_S mem_info;
|
||||
HI_S32 ret = HI_MPI_SYS_GetVirMemInfo(NNIE_CONVERT_64BIT_ADDR(HI_VOID, pv_vir_addr), &mem_info);
|
||||
if (ret == HI_SUCCESS) {
|
||||
*phy_addr = mem_info.u64PhyAddr;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -43,6 +43,8 @@ HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_
|
|||
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
|
||||
|
||||
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size);
|
||||
|
||||
HI_S32 NnieGetVirMemInfo(HI_U64 pv_vir_addr, HI_U64 *phy_addr);
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
|
||||
|
|
|
@ -34,15 +34,14 @@ struct Context::Data {
|
|||
|
||||
#ifdef PARALLEL_INFERENCE
|
||||
int32_t thread_num = 8;
|
||||
bool enable_parallel_ = false;
|
||||
int affinity_mode_ = 1;
|
||||
int32_t inter_op_parallel_num_ = 4;
|
||||
#else
|
||||
int32_t thread_num = 2;
|
||||
bool enable_parallel_ = false;
|
||||
int affinity_mode_ = 0;
|
||||
int32_t inter_op_parallel_num_ = 1;
|
||||
#endif
|
||||
bool enable_parallel_ = false;
|
||||
std::vector<int32_t> affinity_core_list_;
|
||||
std::shared_ptr<Delegate> delegate = nullptr;
|
||||
bool float_mode = false;
|
||||
|
|
|
@ -245,6 +245,14 @@ Status Model::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor>
|
|||
return impl_->Predict(inputs, outputs, before, after);
|
||||
}
|
||||
|
||||
Status Model::Predict(const MSKernelCallBack &before, const MSKernelCallBack &after) {
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Model implement is null.";
|
||||
return kLiteNullptr;
|
||||
}
|
||||
return impl_->Predict(before, after);
|
||||
}
|
||||
|
||||
Status Model::PredictWithPreprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs,
|
||||
const MSKernelCallBack &before, const MSKernelCallBack &after) {
|
||||
MS_LOG(ERROR) << "Unsupported Feature.";
|
||||
|
|
|
@ -341,6 +341,32 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
|
|||
return kSuccess;
|
||||
}
|
||||
|
||||
Status ModelImpl::Predict(const MSKernelCallBack &before, const MSKernelCallBack &after) {
|
||||
if (session_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Run graph failed.";
|
||||
return kLiteError;
|
||||
}
|
||||
auto input_tensors = session_->GetInputs();
|
||||
if (input_tensors.empty()) {
|
||||
MS_LOG(ERROR) << "Failed to get input tensor.";
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
for (auto &input : input_tensors) {
|
||||
if (input->data() == nullptr) {
|
||||
MS_LOG(ERROR) << "Tensor " << input->tensor_name() << " has no data.";
|
||||
return kLiteInputTensorError;
|
||||
}
|
||||
}
|
||||
auto ret = RunGraph(before, after);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "Run graph failed : " << ret;
|
||||
return ret;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Run graph success.";
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
std::vector<MSTensor> ModelImpl::GetInputs() {
|
||||
std::vector<MSTensor> empty;
|
||||
if (session_ == nullptr) {
|
||||
|
|
|
@ -72,6 +72,8 @@ class ModelImpl {
|
|||
Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs, const MSKernelCallBack &before,
|
||||
const MSKernelCallBack &after);
|
||||
|
||||
Status Predict(const MSKernelCallBack &before, const MSKernelCallBack &after);
|
||||
|
||||
lite::LiteSession *CreateLiteSession(lite::InnerContext *context);
|
||||
|
||||
Status LoadConfig(const std::string &config_path);
|
||||
|
|
|
@ -51,6 +51,14 @@ function Run_Hi3516() {
|
|||
else
|
||||
run_result='hi3516: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
|
||||
echo './benchmark --modelFile='${basepath}'/'${model_name}'.ms --inputShapes='${input_shapes}' --warmUpLoopCount=0 --loopCount=2 --configFile='${NNIE_CONFIG_FILE} >> "${run_hi3516_log_file}"
|
||||
./benchmark --modelFile=${basepath}/${model_name}.ms --inputShapes=${input_shapes} --warmUpLoopCount=0 --loopCount=2 --configFile=${NNIE_CONFIG_FILE}>> "${run_hi3516_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='hi3516: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='hi3516: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
done < ${models_nnie_config}
|
||||
}
|
||||
|
||||
|
@ -97,6 +105,7 @@ else
|
|||
echo "Run benchmark failed"
|
||||
MS_PRINT_TESTCASE_END_MSG
|
||||
cat ${run_benchmark_result_file}
|
||||
cat ${run_hi3516_log_file}
|
||||
MS_PRINT_TESTCASE_END_MSG
|
||||
rm -rf ${basepath}/*.ms
|
||||
rm -rf ${basepath}/libmslite_nnie.so
|
||||
|
|
Loading…
Reference in New Issue