!31174 [MS][LITE][STABLE]optimize code | sync from master

Merge pull request !31174 from chenjianping/r1.6_dev
This commit is contained in:
i-robot 2022-03-14 03:28:29 +00:00 committed by Gitee
commit f188616162
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
18 changed files with 140 additions and 105 deletions

View File

@ -194,6 +194,14 @@ int ActorMgr::EnqueueMessage(const mindspore::ActorReference actor, std::unique_
int ActorMgr::Send(const AID &to, std::unique_ptr<MessageBase> msg, bool remoteLink, bool isExactNotRemote) {
// The destination is local
#ifdef BUILD_LITE
auto actor = GetActor(to);
if (actor != nullptr) {
return EnqueueMessage(actor, std::move(msg));
} else {
return ACTOR_NOT_FIND;
}
#else
if (IsLocalAddres(to)) {
auto actor = GetActor(to);
if (actor != nullptr) {
@ -223,6 +231,7 @@ int ActorMgr::Send(const AID &to, std::unique_ptr<MessageBase> msg, bool remoteL
return IO_NOT_FIND;
}
}
#endif
}
AID ActorMgr::Spawn(const ActorReference &actor, bool shareThread) {
@ -235,7 +244,7 @@ AID ActorMgr::Spawn(const ActorReference &actor, bool shareThread) {
MS_LOG(DEBUG) << "ACTOR was spawned,a=" << actor->GetAID().Name().c_str();
if (shareThread) {
auto mailbox = std::unique_ptr<MailBox>(new (std::nothrow) NonblockingMailBox());
auto mailbox = std::make_unique<NonblockingMailBox>();
auto hook = std::unique_ptr<std::function<void()>>(
new std::function<void()>([actor]() { ActorMgr::GetActorMgrRef()->SetActorReady(actor); }));
// the mailbox has this hook, the hook holds the actor reference, the actor has the mailbox. this is a cycle which

View File

@ -33,7 +33,7 @@ std::list<std::unique_ptr<MessageBase>> *BlockingMailBox::GetMsgs() {
while (enqueMailBox->empty()) {
cond.wait(ulk, [this] { return !this->enqueMailBox->empty(); });
}
SwapMailBox(&enqueMailBox, &dequeMailBox);
enqueMailBox->swap(*dequeMailBox);
ret = dequeMailBox;
}
return ret;
@ -62,7 +62,7 @@ std::list<std::unique_ptr<MessageBase>> *NonblockingMailBox::GetMsgs() {
released_ = true;
return nullptr;
}
SwapMailBox(&enqueMailBox, &dequeMailBox);
dequeMailBox->swap(*enqueMailBox);
ret = dequeMailBox;
released_ = false;
}

View File

@ -34,11 +34,6 @@ class MailBox {
virtual std::unique_ptr<MessageBase> GetMsg() = 0;
inline void SetNotifyHook(std::unique_ptr<std::function<void()>> &&hook) { notifyHook = std::move(hook); }
inline bool TakeAllMsgsEachTime() { return takeAllMsgsEachTime; }
void SwapMailBox(std::list<std::unique_ptr<MessageBase>> **box1, std::list<std::unique_ptr<MessageBase>> **box2) {
std::list<std::unique_ptr<MessageBase>> *tmp = *box1;
*box1 = *box2;
*box2 = tmp;
}
protected:
// if this flag is true, GetMsgs() should be invoked to take all enqueued msgs each time, otherwise we can only get

View File

@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.12)
project(Lite)
set(BUILD_LITE "on")
add_compile_definitions(BUILD_LITE)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/secure_option.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_link_option.cmake)

View File

@ -21,7 +21,13 @@
namespace mindspore {
namespace lite {
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#ifndef MS_UNLIKELY
#if defined(__x86_64__) || defined(__amd64__) || defined(_M_IX86) || defined(_M_X64)
#define MS_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define MS_UNLIKELY(x) x
#endif
#endif
enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 };

View File

@ -41,7 +41,7 @@ int GetCoreNum() {
}
void SetNumaBindStrategy(std::vector<std::vector<int>> *all_model_bind_list, int thread_num, int node_id) {
if (UNLIKELY(thread_num == 0)) {
if (MS_UNLIKELY(thread_num == 0)) {
MS_LOG(ERROR) << "thread num is zero.";
return;
}

View File

@ -360,18 +360,20 @@ ThreadPool *InnerContext::thread_pool() const { return thread_pool_; }
bool InnerContext::device_and_pkg_support_fp16() const { return this->device_and_pkg_support_fp16_; }
std::set<void *> InnerContext::GetLinkInfo(void *pre) const {
if (link_info_.find(pre) == link_info_.end()) {
auto iter = link_info_.find(pre);
if (iter == link_info_.end()) {
MS_LOG(DEBUG) << "Not found precursor in link information.";
return {};
}
return link_info_.at(pre);
return iter->second;
}
std::unordered_map<void *, std::set<void *>> InnerContext::GetAllLinkInfo() const { return link_info_; }
void InnerContext::SetLinkInfo(void *pre, void *suc) {
if (link_info_.find(pre) != link_info_.end()) {
link_info_.at(pre).insert(suc);
auto iter = link_info_.find(pre);
if (iter != link_info_.end()) {
iter->second.insert(suc);
return;
}
std::set<void *> suc_set{suc};
@ -385,9 +387,10 @@ void InnerContext::SetAllLinkInfo(const std::unordered_map<void *, std::set<void
void InnerContext::ReplaceLinkInfoReceiverWithNewOne(void *new_receiver, void *old_receiver) {
for (auto &info : link_info_) {
auto &receivers = info.second;
if (receivers.find(old_receiver) != receivers.end()) {
auto iter = receivers.find(old_receiver);
if (iter != receivers.end()) {
receivers.erase(iter);
receivers.insert(new_receiver);
receivers.erase(old_receiver);
}
}
}

View File

@ -20,6 +20,7 @@
#include "mindrt/include/mindrt.hpp"
#include "src/lite_kernel_util.h"
#include "src/common/tensor_util.h"
#include "src/common/common.h"
#include "src/runtime/inner_allocator.h"
#include "src/runtime/kernel/arm/base/partial_fusion.h"
#ifndef CONTROLFLOW_TENSORLIST_CLIP
@ -39,14 +40,14 @@ void LiteOpActor::RunOpData(OpData<lite::Tensor> *inputs, OpContext<lite::Tensor
InitInputData();
auto ret = RunKernel(*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_before_)),
*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_after_)));
auto ret = kernel_->Execute(*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_before_)),
*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_after_)));
input_op_datas_.erase(op_uuid);
if (ret != RET_OK) {
input_op_datas_.erase(op_uuid);
MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
context->SetFailed(ret);
return;
}
input_op_datas_.erase(op_uuid);
AsyncOutput(context);
SetOutputData(context);
return;
@ -89,7 +90,7 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
isolate_input_map_ = input_map;
std::vector<kernel::LiteKernel *> kernels{};
std::transform(actors->begin(), actors->end(), std::back_inserter(kernels),
[](std::shared_ptr<LiteOpActor> actor) { return actor->kernel_; });
[](const std::shared_ptr<LiteOpActor> &actor) { return actor->kernel_; });
size_t in_tensor_size = kernel_->in_tensors().size();
for (size_t i = 0; i < in_tensor_size; i++) {
Tensor *old_tensor = kernel_->in_tensors()[i];
@ -112,7 +113,8 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
}
TypeId new_data_type = GetSubgraphInTensorDataType(kernel_, old_tensor);
Tensor *new_tensor = new Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
Tensor *new_tensor =
new (std::nothrow) Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
if (new_tensor == nullptr) {
MS_LOG(ERROR) << "new Tensor failed.";
return RET_NULL_PTR;
@ -235,38 +237,41 @@ int LiteOpActor::UpdateActorOutput() {
}
#endif
bool LiteOpActor::ArrowHasCompiled(const AID &actor_name, const size_t &to_index,
bool LiteOpActor::ArrowHasCompiled(const AID &actor_name, size_t to_index,
const std::unordered_map<AID, std::set<size_t>> &receiver_index_set) {
if (receiver_index_set.find(actor_name) != receiver_index_set.end()) {
return receiver_index_set.at(actor_name).find(to_index) != receiver_index_set.at(actor_name).end();
auto iter = receiver_index_set.find(actor_name);
if (iter != receiver_index_set.end()) {
return iter->second.find(to_index) != iter->second.end();
}
return false;
}
void LiteOpActor::MarkArrowAsCompiled(const AID *actor_name, const size_t *to_index,
void LiteOpActor::MarkArrowAsCompiled(const AID *actor_name, size_t to_index,
std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
if (receiver_index_set->find(*actor_name) == receiver_index_set->end()) {
std::set<size_t> tmp{*to_index};
std::set<size_t> tmp{to_index};
receiver_index_set->insert(std::pair<AID, std::set<size_t>>(*actor_name, tmp));
} else {
receiver_index_set->at(*actor_name).insert(*to_index);
receiver_index_set->at(*actor_name).insert(to_index);
}
}
int LiteOpActor::CreateCommonArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map,
const std::set<void *> &receiver_tensors, const size_t &output_index,
std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
std::unordered_map<void *, std::set<std::pair<AID, size_t>>>::const_iterator iter;
for (auto receiver_tensor : receiver_tensors) {
if (receivers_map.find(receiver_tensor) == receivers_map.end()) {
iter = receivers_map.find(receiver_tensor);
if (iter == receivers_map.end()) {
MS_LOG(DEBUG) << "not a useful receiver.";
continue;
}
auto receiver_set = receivers_map.at(receiver_tensor);
auto receiver_set = iter->second;
for (auto item : receiver_set) {
if (ArrowHasCompiled(item.first, item.second, *receiver_index_set)) {
continue;
}
MarkArrowAsCompiled(&(item.first), &(item.second), receiver_index_set);
MarkArrowAsCompiled(&(item.first), item.second, receiver_index_set);
auto arrow = std::make_shared<DataArrow>(output_index, item.first, item.second);
MS_CHECK_TRUE_MSG(arrow != nullptr, RET_ERROR, "create arrow failed.");
output_data_arrows_.push_back(arrow);
@ -352,8 +357,9 @@ void LiteOpActor::InitInputData() {
}
void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
for (size_t i = 0; i < output_data_arrows_.size(); i++) {
auto data = outputs_data_.at(i);
auto output_size = output_data_arrows_.size();
for (size_t i = 0; i < output_size; ++i) {
auto data = outputs_data_[i];
Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor<Tensor>::RunOpData, data.get(), context);
}
}
@ -372,11 +378,11 @@ int LiteOpActor::PrepareOutputData() {
auto &arrow = output_data_arrows_[i];
auto data = std::make_shared<OpData<Tensor>>(this->GetAID(), (kernel_->out_tensors()).at(arrow->from_output_index_),
static_cast<int>(arrow->to_input_index_));
if (data == nullptr) {
if (MS_UNLIKELY(data == nullptr)) {
MS_LOG(ERROR) << "new output_data failed.";
return RET_NULL_PTR;
}
outputs_data_.at(i) = data;
outputs_data_[i] = data;
}
return RET_OK;
}
@ -389,6 +395,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
MS_LOG(ERROR) << "thread pool is nullptr";
return actors;
}
actors.reserve(kernels.size());
for (auto &kernel : kernels) {
/* make subgraph name (actor name) unique */
kernel->set_name(kernel->name() + "_" + to_string(actor_count++));

View File

@ -52,7 +52,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
}
void RunOpData(OpData<lite::Tensor> *input_data, OpContext<lite::Tensor> *context = nullptr) override;
virtual int CompileArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map);
int RunKernel(const KernelCallBack &before, const KernelCallBack &after) {
int RunKernel(KernelCallBack before, KernelCallBack after) {
auto ret = kernel_->Execute(before, after);
if (RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
@ -107,9 +107,9 @@ class LiteOpActor : public OpActor<lite::Tensor> {
const std::set<void *> &receiver_tensors, const size_t &output_index,
std::unordered_map<AID, std::set<size_t>> *receiver_index_set);
int CreateEmptyArrow(const size_t &output_index);
bool ArrowHasCompiled(const AID &actor_name, const size_t &to_index,
bool ArrowHasCompiled(const AID &actor_name, size_t to_index,
const std::unordered_map<AID, std::set<size_t>> &receiver_index_set);
void MarkArrowAsCompiled(const AID *actor_name, const size_t *to_index,
void MarkArrowAsCompiled(const AID *actor_name, size_t to_index,
std::unordered_map<AID, std::set<size_t>> *receiver_index_set);
private:

View File

@ -564,9 +564,9 @@ int LiteSession::IsolateOutputTensor() {
if (src_tensor->IsGraphInput()) {
continue;
}
Tensor *new_tensor =
new Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
if (new_tensor == nullptr) {
Tensor *new_tensor = new (std::nothrow)
Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
if (MS_UNLIKELY(new_tensor == nullptr)) {
MS_LOG(ERROR) << "duplicate new output failed.";
return RET_NULL_PTR;
}
@ -590,12 +590,14 @@ int LiteSession::IsolateOutputTensor() {
/* set new tensor for calculate */
for (auto subgraph : kernels_) {
/* subgraph input and output */
for (size_t i = 0; i < subgraph->in_tensors().size(); i++) {
auto in_size = subgraph->in_tensors().size();
for (size_t i = 0; i < in_size; ++i) {
if (subgraph->in_tensors()[i] == src_tensor) {
subgraph->set_in_tensor(new_tensor, i);
}
}
for (size_t i = 0; i < subgraph->out_tensors().size(); i++) {
auto out_size = subgraph->out_tensors().size();
for (size_t i = 0; i < out_size; ++i) {
if (subgraph->out_tensors()[i] == src_tensor) {
subgraph->set_out_tensor(new_tensor, i);
}
@ -607,14 +609,18 @@ int LiteSession::IsolateOutputTensor() {
#endif
/* node input and output */
auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
for (size_t i = 0; i < nodes.size(); i++) {
auto nodes_size = nodes.size();
for (size_t i = 0; i < nodes_size; ++i) {
auto node = nodes[i];
for (size_t j = 0; j < node->out_tensors().size(); j++) {
out_size = node->out_tensors().size();
for (size_t j = 0; j < out_size; ++j) {
if (node->out_tensors()[j] == src_tensor) {
node->set_out_tensor(new_tensor, j);
break;
}
}
for (size_t j = 0; j < node->in_tensors().size(); j++) {
in_size = node->in_tensors().size();
for (size_t j = 0; j < in_size; ++j) {
if (node->in_tensors()[j] == src_tensor) {
node->set_in_tensor(new_tensor, j);
}
@ -906,11 +912,7 @@ int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &af
return ret;
}
MS_ASSERT(this->context_ != nullptr);
if (before == nullptr && after == nullptr) {
ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_);
} else {
ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
}
ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
if (ret != RET_OK) {
MS_LOG(ERROR) << "RunGraph failed : " << ret;
}
@ -1236,7 +1238,7 @@ void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
}
int LiteSession::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels,
const std::unordered_map<Tensor *, Tensor *> isolate_input_map) {
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map) {
for (auto kernel : kernels) {
if (kernel == nullptr) {
MS_LOG(ERROR) << "input kernel is nullptr!";

View File

@ -117,7 +117,7 @@ class LiteSession : public session::LiteSession {
#endif
static int ReSizeKernels(
const std::vector<kernel::LiteKernel *> &kernels,
const std::unordered_map<Tensor *, Tensor *> isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
#ifdef SERVER_INFERENCE
int IniPackWeightData(Model *model);

View File

@ -18,6 +18,7 @@
#include <memory>
#include "src/lite_mindrt.h"
#include "include/errorcode.h"
#include "src/common/common.h"
#include "src/common/tensor_util.h"
#ifdef ENABLE_FP16
#include "nnacl/base/cast_base.h"
@ -27,7 +28,8 @@
namespace mindspore::lite {
int MindrtExecutor::PrepareGraphInput(const std::vector<kernel::LiteKernel *> &kernels,
const std::vector<Tensor *> &inputs) {
for (size_t j = 0; j < kernels.size(); ++j) {
auto kernels_size = kernels.size();
for (size_t j = 0; j < kernels_size; ++j) {
auto in_tensor_size = kernels[j]->in_tensors().size();
for (size_t k = 0; k < in_tensor_size; ++k) {
auto tensor = kernels[j]->in_tensors()[k];
@ -40,7 +42,7 @@ int MindrtExecutor::PrepareGraphInput(const std::vector<kernel::LiteKernel *> &k
return RET_ERROR;
}
auto data = std::make_shared<OpData<Tensor>>(op_actors_[j]->GetAID(), inputs.at(idx), static_cast<int>(k));
if (data == nullptr) {
if (MS_UNLIKELY(data == nullptr)) {
MS_LOG(ERROR) << "new opdata failed.";
return RET_NULL_PTR;
}
@ -52,7 +54,8 @@ int MindrtExecutor::PrepareGraphInput(const std::vector<kernel::LiteKernel *> &k
int MindrtExecutor::PrepareGraphOutput(const std::vector<kernel::LiteKernel *> &kernels,
const std::vector<Tensor *> &outputs) {
for (size_t i = 0; i < outputs.size(); ++i) {
auto outputs_size = outputs.size();
for (size_t i = 0; i < outputs_size; ++i) {
Tensor *graph_output_tensor = outputs[i];
if (graph_output_tensor->IsGraphInput()) {
continue;
@ -66,8 +69,8 @@ int MindrtExecutor::PrepareGraphOutput(const std::vector<kernel::LiteKernel *> &
});
MS_ASSERT(current_output_map != isolate_output_map_->end());
Tensor *subgraph_output_tensor = current_output_map->first;
for (size_t j = 0; j < kernels.size(); ++j) {
auto kernels_size = kernels.size();
for (size_t j = 0; j < kernels_size; ++j) {
auto out_tensor_size = kernels[j]->out_tensors().size();
for (size_t k = 0; k < out_tensor_size; ++k) {
if (subgraph_output_tensor != kernels[j]->out_tensors()[k]) {
@ -75,7 +78,7 @@ int MindrtExecutor::PrepareGraphOutput(const std::vector<kernel::LiteKernel *> &
}
auto data =
std::make_shared<OpData<Tensor>>(op_actors_[j]->GetAID(), subgraph_output_tensor, static_cast<int>(k));
if (data == nullptr) {
if (MS_UNLIKELY(data == nullptr)) {
MS_LOG(ERROR) << "new opdata failed.";
return RET_NULL_PTR;
}
@ -114,8 +117,9 @@ std::unordered_map<void *, std::set<std::pair<AID, size_t>>> MindrtExecutor::Bui
for (size_t i = 0; i < input_tensors.size(); ++i) {
auto key = input_tensors[i];
auto pair = std::make_pair(op_actor->GetAID(), i);
if (receivers_map.find(key) != receivers_map.end()) {
receivers_map.at(key).insert(pair);
auto iter = receivers_map.find(key);
if (iter != receivers_map.end()) {
iter->second.emplace(pair);
} else {
std::set<std::pair<AID, size_t>> tmp_set{pair};
receivers_map[input_tensors[i]] = tmp_set;
@ -127,7 +131,7 @@ std::unordered_map<void *, std::set<std::pair<AID, size_t>>> MindrtExecutor::Bui
int MindrtExecutor::LinkActors() {
auto receivers_map = BuildReceiverMap();
for (auto op_actor : op_actors_) {
for (auto &&op_actor : op_actors_) {
auto ret = op_actor->CompileArrow(receivers_map);
if (ret != RET_OK) {
MS_LOG(ERROR) << "actor: " << op_actor->GetAID() << " compile arrow failed.";
@ -138,7 +142,7 @@ int MindrtExecutor::LinkActors() {
}
int MindrtExecutor::PostInitActors() {
for (auto actor : op_actors_) {
for (auto &&actor : op_actors_) {
auto ret = actor->PostInit();
if (ret != RET_OK) {
MS_LOG(ERROR) << "PrepareGraphOutput failed, actor aid: " << actor->GetAID();

View File

@ -18,7 +18,6 @@
#include "src/common/log_adapter.h"
#include "src/common/utils.h"
#include "src/common/common.h"
#include "src/runtime/numa_adapter.h"
using mindspore::numa::NUMAAdapter;
@ -49,13 +48,13 @@ void *MemOperator::Allocate(size_t rounded_size, int node_id, size_t *allocate_s
int64_t left = 0;
if (node_id >= 0) {
// allocate memory from numa node
MemoryInfo mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id);
MemoryInfo mem_info = numa_instance_->GetNodeSize(node_id);
free_count = mem_info.free;
} else {
free_count = lite::GetFreeMemory();
}
if (UNLIKELY(static_cast<int64_t>(rounded_size) >= free_count)) {
if (MS_UNLIKELY(static_cast<int64_t>(rounded_size) >= free_count)) {
MS_LOG(ERROR) << "No enough memory left!node_id: " << node_id << ", request: " << rounded_size
<< ", free: " << free_count << ", least free request: " << least_free_memory_;
return nullptr;
@ -75,16 +74,16 @@ void *MemOperator::Allocate(size_t rounded_size, int node_id, size_t *allocate_s
data = _aligned_malloc(allocate_tmp_size, kMemAlginSize);
#else
if (node_id >= 0) {
data = NUMAAdapter::GetInstance()->Malloc(node_id, static_cast<size_t>(allocate_tmp_size));
data = numa_instance_->Malloc(node_id, static_cast<size_t>(allocate_tmp_size));
} else {
auto ret = posix_memalign(&data, kMemAlginSize, static_cast<size_t>(allocate_tmp_size));
if (UNLIKELY(ret != 0)) {
if (MS_UNLIKELY(ret != 0)) {
MS_LOG(ERROR) << "posix_memalign failed!ret: " << ret;
return nullptr;
}
}
#endif
if (UNLIKELY(data == nullptr)) {
if (MS_UNLIKELY(data == nullptr)) {
MS_LOG(ERROR) << "malloc data failed!";
return nullptr;
}
@ -147,7 +146,7 @@ void *MemOperator::Malloc(size_t size) {
// todo kAllocUnitSize can be replaced by config
size_t allocate_size;
void *data = Allocate(rounded_size, node_id_, &allocate_size);
if (UNLIKELY(data == nullptr)) {
if (MS_UNLIKELY(data == nullptr)) {
return nullptr;
}
all_datas_.emplace(data, allocate_size);
@ -169,7 +168,7 @@ void *MemOperator::Malloc(size_t size) {
// return memory to the memory pool
void MemOperator::Free(void *ptr) {
if (UNLIKELY(ptr == nullptr)) {
if (MS_UNLIKELY(ptr == nullptr)) {
return;
}
std::lock_guard<std::mutex> locker(mutex_);
@ -230,9 +229,10 @@ void MemOperator::EraseFreeBlock(const int64_t index) {
}
MemOperator::MemOperator(int node_id) {
if (node_id >= 0 && NUMAAdapter::GetInstance()->Available()) {
numa_instance_ = NUMAAdapter::GetInstance();
if (node_id >= 0 && numa_instance_->Available()) {
node_id_ = node_id;
auto mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id_);
auto mem_info = numa_instance_->GetNodeSize(node_id_);
if (mem_info.total <= 0) {
return;
}
@ -247,7 +247,7 @@ MemOperator::MemOperator(int node_id) {
auto *block = GetBlock();
size_t allocate_size;
block->data_ = Allocate(kAllocUnitSize, node_id, &allocate_size);
if (UNLIKELY(block->data_ == nullptr)) {
if (MS_UNLIKELY(block->data_ == nullptr)) {
return;
}
all_datas_.emplace(block->data_, allocate_size);
@ -262,7 +262,7 @@ MemOperator::~MemOperator() {
_aligned_free(data.first);
#else
if (node_id_ >= 0) {
NUMAAdapter::GetInstance()->Free(data.first, data.second);
numa_instance_->Free(data.first, data.second);
} else {
free(data.first);
}
@ -328,7 +328,7 @@ std::shared_ptr<MemOperator> DynamicMemManager::GetMemOperator(const int node_id
iter = nodes_mem_.find(numa_node_id);
if (iter == nodes_mem_.end()) {
mem_oper = std::make_shared<MemOperator>(numa_node_id);
if (UNLIKELY(mem_oper == nullptr)) {
if (MS_UNLIKELY(mem_oper == nullptr)) {
MS_LOG(ERROR) << "make_shared MemOperator failed!";
return nullptr;
}

View File

@ -23,6 +23,7 @@
#include <map>
#include <unordered_map>
#include <deque>
#include "src/runtime/numa_adapter.h"
namespace mindspore {
struct Block {
@ -39,7 +40,7 @@ struct Block {
class MemOperator {
public:
explicit MemOperator(int node_id);
virtual ~MemOperator();
~MemOperator();
void *Malloc(size_t size);
void Free(void *ptr);
@ -62,6 +63,7 @@ class MemOperator {
// all data blocks
size_t block_count_ = 0;
int64_t garbage_block_;
std::shared_ptr<numa::NUMAAdapter> numa_instance_ = nullptr;
std::mutex mutex_;
std::vector<Block> blocks_;
// key: data size, value: Block index

View File

@ -23,6 +23,7 @@ namespace numa {
namespace {
static constexpr int kSuccess = 0;
static constexpr int kBitsPerByte = 8;
static constexpr auto kBitsPerMask = static_cast<int>(sizeof(uint64_t) * kBitsPerByte);
} // namespace
NUMAAdapter::NUMAAdapter() {
@ -34,7 +35,7 @@ NUMAAdapter::NUMAAdapter() {
}
numa_interfaces_.numa_available = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_available"));
if (UNLIKELY(numa_interfaces_.numa_available == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_available == nullptr)) {
MS_LOG(ERROR) << "numa_available not found!";
}
if (numa_interfaces_.numa_available() < 0) {
@ -46,64 +47,64 @@ NUMAAdapter::NUMAAdapter() {
available_ = true;
numa_interfaces_.numa_num_configured_nodes =
reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_configured_nodes"));
if (UNLIKELY(numa_interfaces_.numa_num_configured_nodes == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_num_configured_nodes == nullptr)) {
MS_LOG(ERROR) << "numa_num_configured_nodes not found!";
available_ = false;
}
numa_interfaces_.numa_num_task_cpus = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_task_cpus"));
if (UNLIKELY(numa_interfaces_.numa_num_task_cpus == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_num_task_cpus == nullptr)) {
MS_LOG(ERROR) << "numa_num_task_cpus not found!";
available_ = false;
}
numa_interfaces_.numa_node_to_cpus =
reinterpret_cast<int (*)(int node, struct bitmask *mask)>(dlsym(handle_, "numa_node_to_cpus"));
if (UNLIKELY(numa_interfaces_.numa_node_to_cpus == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_node_to_cpus == nullptr)) {
MS_LOG(ERROR) << "numa_node_to_cpus not found!";
available_ = false;
}
numa_interfaces_.numa_allocate_nodemask =
reinterpret_cast<struct bitmask *(*)(void)>(dlsym(handle_, "numa_allocate_nodemask"));
if (UNLIKELY(numa_interfaces_.numa_allocate_nodemask == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_allocate_nodemask == nullptr)) {
MS_LOG(ERROR) << "numa_allocate_nodemask not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_clearall =
reinterpret_cast<struct bitmask *(*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_clearall"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_clearall == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_bitmask_clearall == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_clearall not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_setbit =
reinterpret_cast<struct bitmask *(*)(struct bitmask *, unsigned int)>(dlsym(handle_, "numa_bitmask_setbit"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_setbit == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_bitmask_setbit == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_setbit not found!";
available_ = false;
}
numa_interfaces_.numa_bind = reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bind"));
if (UNLIKELY(numa_interfaces_.numa_bind == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_bind == nullptr)) {
MS_LOG(ERROR) << "numa_bind not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_free =
reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_free"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_free == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_bitmask_free == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_free not found!";
available_ = false;
}
numa_interfaces_.numa_alloc_onnode =
reinterpret_cast<void *(*)(size_t size, int node)>(dlsym(handle_, "numa_alloc_onnode"));
if (UNLIKELY(numa_interfaces_.numa_alloc_onnode == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_alloc_onnode == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_free not found!";
available_ = false;
}
numa_interfaces_.numa_node_size64 =
reinterpret_cast<int64_t (*)(int node, int64_t *freep)>(dlsym(handle_, "numa_node_size64"));
if (UNLIKELY(numa_interfaces_.numa_node_size64 == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_node_size64 == nullptr)) {
MS_LOG(ERROR) << "numa_node_size64 not found!";
available_ = false;
}
numa_interfaces_.numa_free = reinterpret_cast<void (*)(void *start, size_t size)>(dlsym(handle_, "numa_free"));
if (UNLIKELY(numa_interfaces_.numa_free == nullptr)) {
if (MS_UNLIKELY(numa_interfaces_.numa_free == nullptr)) {
MS_LOG(ERROR) << "numa_free not found!";
available_ = false;
}
@ -119,7 +120,7 @@ void NUMAAdapter::Bind(int node_id) {
return;
}
auto bitmask = numa_interfaces_.numa_allocate_nodemask();
if (UNLIKELY(bitmask == nullptr)) {
if (MS_UNLIKELY(bitmask == nullptr)) {
MS_LOG(ERROR) << "bind numa_node " << node_id << " failed!";
return;
}
@ -172,7 +173,7 @@ std::vector<int> NUMAAdapter::GetCPUList(int node_id) {
return cpu_list;
}
int cpu_num = numa_interfaces_.numa_num_task_cpus();
if (UNLIKELY(cpu_num < 0)) {
if (MS_UNLIKELY(cpu_num < 0)) {
MS_LOG(ERROR) << "numa_num_task_cpus return " << cpu_num;
return cpu_list;
}
@ -180,12 +181,11 @@ std::vector<int> NUMAAdapter::GetCPUList(int node_id) {
int maskp_index = 0;
auto maskp = nodemask->maskp;
do {
if (UNLIKELY(maskp == nullptr)) {
if (MS_UNLIKELY(maskp == nullptr)) {
MS_LOG(ERROR) << "maskp is nullptr!";
break;
}
auto mask = *(maskp);
static constexpr auto kBitsPerMask = static_cast<int>(sizeof(decltype(mask)) * kBitsPerByte);
int step = static_cast<int>(maskp_index * kBitsPerMask);
for (int i = 0; i < kBitsPerMask; ++i) {
if (mask & 1) {
@ -219,6 +219,7 @@ NUMAAdapter::~NUMAAdapter() {
return;
}
(void)dlclose(handle_);
handle_ = nullptr;
}
} // namespace numa
} // namespace mindspore

View File

@ -16,12 +16,18 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
#include <numa.h>
#include <cstdint>
#include <cstddef>
#include <vector>
#include <memory>
namespace mindspore {
namespace numa {
struct bitmask {
uint64_t size;
uint64_t *maskp;
};
struct NUMAInterface {
int (*numa_available)(void);
int (*numa_num_configured_nodes)(void);
@ -44,12 +50,13 @@ struct MemoryInfo {
class NUMAAdapter {
public:
static NUMAAdapter *GetInstance() {
static NUMAAdapter instance;
return &instance;
static std::shared_ptr<NUMAAdapter> GetInstance() {
static std::shared_ptr<NUMAAdapter> instance = std::make_shared<NUMAAdapter>();
return instance;
}
virtual ~NUMAAdapter();
NUMAAdapter();
~NUMAAdapter();
inline bool Available() const { return false; }
void Bind(int node_id);
void *Malloc(int node_id, size_t size);
@ -60,8 +67,6 @@ class NUMAAdapter {
MemoryInfo GetNodeSize(int node_id);
private:
NUMAAdapter();
void *handle_; // numa.so handle
bool available_ = false;
NUMAInterface numa_interfaces_;

View File

@ -203,7 +203,7 @@ int Scheduler::HandleBuildinCpuKernelWeight(const kernel::SubGraphType belong_su
return RET_OK;
}
int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> &&dst_kernels) {
if (is_train_session_) {
return RET_OK;
}
@ -422,7 +422,7 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
}
#endif
ret = InitKernels(*dst_kernels);
ret = InitKernels(std::move(*dst_kernels));
if (ret != RET_OK) {
MS_LOG(ERROR) << "InitKernels failed.";
return ret;
@ -1558,7 +1558,7 @@ kernel::LiteKernel *FindAllSubGraphKernels(const std::vector<kernel::LiteKernel
}
} // namespace
int Scheduler::ConstructNormalSubGraphs(const std::vector<kernel::LiteKernel *> src_kernel,
int Scheduler::ConstructNormalSubGraphs(const std::vector<kernel::LiteKernel *> &src_kernel,
std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *is_kernel_finish) {
if (src_kernel.empty()) {

View File

@ -93,7 +93,7 @@ class Scheduler {
int FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, TypeId data_type, kernel::LiteKernel **kernel);
int InitKernels(std::vector<kernel::LiteKernel *> dst_kernels);
int InitKernels(std::vector<kernel::LiteKernel *> &&dst_kernels);
kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node);
// schedule a partial node to a subgraph_kernel
std::vector<kernel::LiteKernel *> ScheduleSubGraphToSubGraphKernels(const int &subgraph_index);
@ -106,7 +106,7 @@ class Scheduler {
std::vector<lite::Tensor *> *in_tensors, std::vector<lite::Tensor *> *out_tensors,
TypeId prefer_data_type = kTypeUnknown);
// vector<LiteKernel/SubGraphKernel> --> vector<SubGraphKernel>
int ConstructNormalSubGraphs(const std::vector<kernel::LiteKernel *> src_kernel,
int ConstructNormalSubGraphs(const std::vector<kernel::LiteKernel *> &src_kernel,
std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);