!30550 【MS】【LITE】support NUMA

Merge pull request !30550 from chenjianping/master_dev
This commit is contained in:
i-robot 2022-02-25 06:16:00 +00:00 committed by Gitee
commit 87b47c76a9
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
19 changed files with 584 additions and 89 deletions

View File

@ -136,6 +136,7 @@ set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
)
endif()

View File

@ -21,6 +21,8 @@
namespace mindspore {
namespace lite {
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 };
enum HWCK_SHAPE { HWCK_H = 0, HWCK_W = 1, HWCK_C = 2, HWCK_K = 3 };
@ -57,6 +59,11 @@ static const char *const kMSCacheModelPath = "cache_model_path";
static const char *const kMSCacheVocabSize = "vocab_size";
static const char *const kMSCacheDeviceSize = "device_cache_size";
static const char *const kMSCacheSerializePath = "serialize_path";
// config
#ifdef SERVER_INFERENCE
static const char *const kConfigServerInference = "server_inference";
static const char *const kConfigNUMANodeId = "numa_node_id";
#endif
} // namespace lite
} // namespace mindspore

View File

@ -27,6 +27,9 @@
#include <sys/types.h>
#include <sys/param.h>
#endif
#ifdef SERVER_INFERENCE
#include <sys/sysinfo.h>
#endif
namespace mindspore {
namespace lite {
@ -178,5 +181,17 @@ size_t GetMaxMallocSize() {
#endif
return max_malloc_size;
}
#ifdef SERVER_INFERENCE
int64_t GetFreeMemory() {
struct sysinfo info;
auto ret = sysinfo(&info);
if (ret != 0) {
MS_LOG(ERROR) << "sysinfo failed!ret = " << ret;
return 0;
}
return static_cast<int64_t>(info.freeram);
}
#endif
} // namespace lite
} // namespace mindspore

View File

@ -43,6 +43,9 @@ uint64_t GetTimeUs();
bool IsSupportSDot();
size_t GetMaxMallocSize();
#ifdef SERVER_INFERENCE
int64_t GetFreeMemory();
#endif
#ifdef __ANDROID__
uint32_t getHwCap(int hwcap_type);

View File

@ -34,6 +34,9 @@
#include "src/lite_session.h"
#include "src/common/file_utils.h"
#include "src/common/config_file.h"
#ifdef SERVER_INFERENCE
#include "src/common/common.h"
#endif
namespace mindspore {
namespace {
@ -684,7 +687,15 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
delete context;
return nullptr;
}
#ifdef SERVER_INFERENCE
auto iter = config_info_.find(lite::kConfigServerInference);
if (iter != config_info_.end()) {
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);
if (numa_iter != iter->second.end()) {
context->SetNodeId(std::atoi(numa_iter->second.c_str()));
}
}
#endif
session->InitExecutionConfig(&execution_plan_);
session->SetConfigInfo(&config_info_);

View File

@ -22,6 +22,9 @@
#include "src/runtime/inner_allocator.h"
#include "src/common//file_utils.h"
#include "src/pack_weight_manager.h"
#include "src/runtime/numa_adapter.h"
#include "src/common/common.h"
namespace mindspore {
namespace {
constexpr int32_t kNumThreads = 4;
@ -36,6 +39,32 @@ int GetCoreNum() {
#endif
return core_num;
}
void SetNumaBindStrategy(std::vector<std::vector<int>> *all_model_bind_list, int thread_num, int node_id) {
if (UNLIKELY(thread_num == 0)) {
MS_LOG(ERROR) << "thread num is zero.";
return;
}
std::vector<int> cpu_list = numa::NUMAAdapter::GetInstance()->GetCPUList(node_id);
auto cpu_num = cpu_list.size();
if (cpu_num == 0) {
return;
}
std::vector<int> bind_id;
bind_id.reserve(thread_num);
all_model_bind_list->reserve(cpu_num / thread_num + 1);
bind_id.emplace_back(cpu_list[0]);
for (size_t i = 1; i < cpu_num; ++i) {
if (i % thread_num == 0) {
all_model_bind_list->emplace_back(bind_id);
bind_id.clear();
}
bind_id.emplace_back(cpu_list[i]);
}
if (!bind_id.empty()) {
all_model_bind_list->emplace_back(bind_id);
}
}
} // namespace
void ModelPool::SetBindStrategy(std::vector<std::vector<int>> *all_model_bind_list, int thread_num) {
@ -112,14 +141,26 @@ ModelPoolContex ModelPool::CreateModelContext(const std::shared_ptr<RunnerConfig
MS_LOG(ERROR) << "context is nullptr.";
return {};
}
if (model_context->GetThreadNum() == 0) {
MS_LOG(ERROR) << "thread num is zero.";
if (model_context->GetThreadNum() < 1) {
MS_LOG(ERROR) << "Invalid thread num " << model_context->GetThreadNum();
return {};
}
int node_id = -1;
if (numa::NUMAAdapter::GetInstance()->Available()) {
node_id = 0;
num_models_ =
numa::NUMAAdapter::GetInstance()->GetCPUList(node_id).size() / static_cast<int>(model_context->GetThreadNum());
} else {
num_models_ = GetCoreNum() / static_cast<int>(model_context->GetThreadNum());
}
ModelPoolContex model_pool_context;
std::vector<std::vector<int>> all_model_bind_list;
if (model_context->GetThreadAffinityMode() == lite::HIGHER_CPU) {
SetBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()));
if (numa::NUMAAdapter::GetInstance()->Available()) {
SetNumaBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()), node_id);
} else {
SetBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()));
}
} else if (model_context->GetThreadAffinityMode() == lite::MID_CPU) {
MS_LOG(ERROR) << "not support bind MID_CPU.";
return {};
@ -171,6 +212,7 @@ Status ModelPool::Init(const std::string &model_path, const std::shared_ptr<Runn
MS_LOG(ERROR) << "CreateModelContext failed, context is empty.";
return kLiteError;
}
size_t size = 0;
graph_buf_ = lite::ReadFile(model_path.c_str(), &size);
if (graph_buf_ == nullptr) {
@ -178,10 +220,14 @@ Status ModelPool::Init(const std::string &model_path, const std::shared_ptr<Runn
return kLiteError;
}
lite::PackWeightManager::GetInstance()->InitWeightManagerByBuf(graph_buf_);
int node_id = -1;
if (numa::NUMAAdapter::GetInstance()->Available()) {
node_id = 0;
}
std::shared_ptr<ModelThread> model_thread = nullptr;
for (size_t i = 0; i < num_models_; i++) {
model_thread = std::make_shared<ModelThread>();
auto status = model_thread->Init(graph_buf_, size, model_pool_context[i], dec_key, dec_mode);
auto status = model_thread->Init(graph_buf_, size, model_pool_context[i], dec_key, dec_mode, node_id);
if (status != kSuccess) {
MS_LOG(ERROR) << " model thread init failed.";
return kLiteError;

View File

@ -16,6 +16,7 @@
#include "src/cxx_api/model_pool/model_worker.h"
#include "src/common/log.h"
#include "src/common/utils.h"
#include "src/common/common.h"
namespace mindspore {
void ModelThread::Run() {
while (!PredictTaskQueue::GetInstance()->IsPredictTaskDone()) {
@ -59,9 +60,12 @@ void ModelThread::Run() {
}
Status ModelThread::Init(const char *model_buf, size_t size, const std::shared_ptr<Context> &model_context,
const Key &dec_key, const std::string &dec_mode) {
const Key &dec_key, const std::string &dec_mode, int node_id) {
model_ = std::make_shared<Model>();
mindspore::ModelType model_type = kMindIR;
if (node_id > -1) {
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
}
auto status = model_->Build(model_buf, size, model_type, model_context, dec_key, dec_mode);
if (status != kSuccess) {
MS_LOG(ERROR) << "model build failed in ModelPool Init";

View File

@ -36,7 +36,7 @@ class ModelThread {
// the model pool is initialized once and can always accept model run requests
Status Init(const char *model_buf, size_t size, const std::shared_ptr<Context> &model_context,
const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm, int node_id = -1);
std::vector<MSTensor> GetInputs();

View File

@ -15,6 +15,7 @@
*/
#include "src/inner_context.h"
#include <algorithm>
#include <memory>
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
#include "src/common/log_util.h"
@ -138,7 +139,11 @@ int InnerContext::Init() {
}
if (this->allocator == nullptr) {
#ifdef SERVER_INFERENCE
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
#else
this->allocator = mindspore::Allocator::Create();
#endif
CHECK_NULL_RETURN(this->allocator);
}
if (IsNpuEnabled()) {

View File

@ -20,7 +20,11 @@
#include <string>
#include <unordered_map>
#include "include/context.h"
#ifdef SERVER_INFERENCE
#include "src/runtime/dynamic_mem_allocator.h"
#else
#include "src/runtime/inner_allocator.h"
#endif
#include "thread/threadpool.h"
#include "nnacl/op_base.h"
#ifdef ENABLE_ARM
@ -82,6 +86,13 @@ struct InnerContext : public Context {
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
#ifdef SERVER_INFERENCE
/// \brief Set NUMA node id.
///
/// \param[in] node Define the NUMA node id.
inline void SetNodeId(int node_id) { node_id_ = node_id; }
#endif
private:
bool IsAllDeviceTypeValid() const;
@ -99,6 +110,10 @@ struct InnerContext : public Context {
bool device_and_pkg_support_fp16_ = false;
#ifdef SERVER_INFERENCE
int node_id_ = -1;
#endif
ThreadPool *thread_pool_{nullptr};
// key is the precursor tensor's pointer, value is the group of successors' pointer.

View File

@ -40,6 +40,9 @@
#include "src/lite_model.h"
#include "src/weight_decoder.h"
#include "src/runtime/runtime_allocator.h"
#ifdef SERVER_INFERENCE
#include "src/runtime/dynamic_mem_allocator.h"
#endif
#include "src/lite_kernel_util.h"
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
#include "src/registry/register_kernel_impl.h"
@ -1078,6 +1081,7 @@ int LiteSession::Init(InnerContext *context) {
is_running_.store(false);
return ret;
}
is_running_.store(false);
return RET_OK;
}

View File

@ -13,18 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef MSLITE_ENABLE_SERVER_INFERENCE
#include "src/runtime/dynamic_mem_allocator.h"
#else
#include "src/runtime/inner_allocator.h"
#endif
namespace mindspore {
std::shared_ptr<Allocator> Allocator::Create() {
#ifdef MSLITE_ENABLE_SERVER_INFERENCE
return std::make_shared<DynamicMemAllocator>();
#else
return std::make_shared<DefaultAllocator>();
#endif
}
std::shared_ptr<Allocator> Allocator::Create() { return std::make_shared<DefaultAllocator>(); }
} // namespace mindspore

View File

@ -15,40 +15,43 @@
*/
#include "src/runtime/dynamic_mem_allocator.h"
#include <memory>
#include "src/runtime/dynamic_mem_manager.h"
#include "src/common/utils.h"
#include "src/common/log_adapter.h"
namespace mindspore {
void *DynamicMemAllocator::Malloc(size_t size) { return DynamicMemManager::GetInstance()->Malloc(size); }
void *DynamicMemAllocator::Malloc(size_t size) { return mem_oper_->Malloc(size); }
void DynamicMemAllocator::Free(void *ptr) { DynamicMemManager::GetInstance()->Free(ptr); }
void DynamicMemAllocator::Free(void *ptr) { mem_oper_->Free(ptr); }
int DynamicMemAllocator::RefCount(void *ptr) {
if (ptr == nullptr) {
return -1;
}
return DynamicMemManager::GetInstance()->RefCount(ptr);
return mem_oper_->RefCount(ptr);
}
int DynamicMemAllocator::SetRefCount(void *ptr, int ref_count) {
if (ptr == nullptr) {
return -1;
}
return DynamicMemManager::GetInstance()->SetRefCount(ptr, ref_count);
return mem_oper_->SetRefCount(ptr, ref_count);
}
int DynamicMemAllocator::IncRefCount(void *ptr, int ref_count) {
if (ptr == nullptr) {
return -1;
}
return DynamicMemManager::GetInstance()->IncRefCount(ptr, ref_count);
return mem_oper_->IncRefCount(ptr, ref_count);
}
int DynamicMemAllocator::DecRefCount(void *ptr, int ref_count) {
if (ptr == nullptr) {
return -1;
}
return DynamicMemManager::GetInstance()->DecRefCount(ptr, ref_count);
return mem_oper_->DecRefCount(ptr, ref_count);
}
DynamicMemAllocator::DynamicMemAllocator(int node_id) {
mem_oper_ = DynamicMemManager::GetInstance()->GetMemOperator(node_id);
}
} // namespace mindspore

View File

@ -19,13 +19,15 @@
#include <mutex>
#include <map>
#include <memory>
#include <unordered_map>
#include "include/api/allocator.h"
#include "src/runtime/dynamic_mem_manager.h"
namespace mindspore {
class DynamicMemAllocator : public Allocator {
public:
DynamicMemAllocator() = default;
explicit DynamicMemAllocator(int node_id);
virtual ~DynamicMemAllocator() = default;
void *Malloc(size_t size) override;
void Free(void *ptr) override;
@ -33,6 +35,9 @@ class DynamicMemAllocator : public Allocator {
int SetRefCount(void *ptr, int ref_count) override;
int IncRefCount(void *ptr, int ref_count) override;
int DecRefCount(void *ptr, int ref_count) override;
private:
std::shared_ptr<MemOperator> mem_oper_;
};
} // namespace mindspore

View File

@ -17,6 +17,12 @@
#include "src/runtime/dynamic_mem_manager.h"
#include "src/common/log_adapter.h"
#include "src/common/utils.h"
#include "src/common/common.h"
#include "src/runtime/numa_adapter.h"
using mindspore::numa::NUMAAdapter;
using mindspore::numa::MemoryInfo;
namespace mindspore {
namespace {
@ -24,51 +30,69 @@ namespace {
static constexpr size_t kMemAlginSize = 64;
// The minimum unit size (512M) of memory block used for dynamic extend.
static constexpr size_t kAllocUnitSize = 536870912;
static constexpr auto kAllocUnitSize = 536870912;
static constexpr size_t kBlockSize = 1024;
static constexpr auto kBlockSize = 2048;
// invalid block index
static constexpr int64_t kInvalidIndex = -1;
static constexpr int kInvalidIndex = -1;
// invalid numa node id
static constexpr int kInvalidNodeId = -1;
static constexpr int kInvalidRefCount = -1;
static constexpr float kDefaultMemoryLeastRatio = 0.1;
size_t Rounded(size_t size) { return (size + kMemAlginSize - 1) & (~(kMemAlginSize - 1)); }
} // namespace
void *Allocate(size_t allocate_size) {
if (allocate_size > lite::GetMaxMallocSize()) {
MS_LOG(ERROR) << "MallocData out of max_size, size: " << allocate_size;
void *MemOperator::Allocate(size_t rounded_size, int node_id, size_t *allocate_size) {
int64_t allocate_tmp_size = static_cast<int64_t>(rounded_size < kAllocUnitSize ? kAllocUnitSize : rounded_size);
int64_t free_count = 0;
int64_t left = 0;
if (node_id >= 0) {
// allocate memory from numa node
MemoryInfo mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id);
free_count = mem_info.free;
} else {
free_count = lite::GetFreeMemory();
}
if (UNLIKELY(static_cast<int64_t>(rounded_size) >= free_count)) {
MS_LOG(ERROR) << "No enough memory left!node_id: " << node_id << ", request: " << rounded_size
<< ", free: " << free_count << ", least free request: " << least_free_memory_;
return nullptr;
}
if (free_count < allocate_tmp_size) {
allocate_tmp_size = rounded_size;
}
left = free_count - allocate_tmp_size;
if (left <= least_free_memory_) {
MS_LOG(ERROR) << "No enough memory left!node_id: " << node_id << ", request: " << rounded_size
<< ", free: " << free_count << ", least free request: " << least_free_memory_;
return nullptr;
}
*allocate_size = allocate_tmp_size;
void *data = nullptr;
#ifdef _WIN32
data = _aligned_malloc(allocate_size, kMemAlginSize);
data = _aligned_malloc(allocate_tmp_size, kMemAlginSize);
#else
auto ret = posix_memalign(&data, kMemAlginSize, allocate_size);
if (UNLIKELY(ret != 0)) {
MS_LOG(ERROR) << "posix_memalign failed!ret: " << ret;
return nullptr;
if (node_id >= 0) {
data = NUMAAdapter::GetInstance()->Malloc(node_id, static_cast<size_t>(allocate_tmp_size));
} else {
auto ret = posix_memalign(&data, kMemAlginSize, static_cast<size_t>(allocate_tmp_size));
if (UNLIKELY(ret != 0)) {
MS_LOG(ERROR) << "posix_memalign failed!ret: " << ret;
return nullptr;
}
}
#endif
if (UNLIKELY(data == nullptr)) {
MS_LOG(ERROR) << "malloc data failed!";
return nullptr;
}
return data;
}
} // namespace
DynamicMemManager::DynamicMemManager() {
blocks_.resize(kBlockSize);
garbage_block_ = kInvalidIndex;
auto *block = GetBlock();
block->data_ = Allocate(kAllocUnitSize);
if (UNLIKELY(block->data_ == nullptr)) {
return;
}
all_datas_.emplace_back(block->data_);
block->size_ = kAllocUnitSize;
free_blocks_.emplace(kAllocUnitSize, block->index_);
}
Block *DynamicMemManager::GetBlock() {
Block *MemOperator::GetBlock() {
Block *block;
if (garbage_block_ != kInvalidIndex) {
block = &blocks_[garbage_block_];
@ -87,24 +111,25 @@ Block *DynamicMemManager::GetBlock() {
return block;
}
void DynamicMemManager::AddGarbageBlock(const int64_t index) {
void MemOperator::AddGarbageBlock(const int64_t index) {
blocks_[index].next_index_ = garbage_block_;
garbage_block_ = index;
}
// malloc memory for data storage
void *DynamicMemManager::Malloc(size_t size) {
void *MemOperator::Malloc(size_t size) {
auto rounded_size = Rounded(size);
std::lock_guard<std::mutex> locker(mutex_);
auto iter = free_blocks_.lower_bound(rounded_size);
if (iter != free_blocks_.end()) {
auto index = iter->second;
free_blocks_.erase(iter);
auto *block = &blocks_[index];
block->used_ = true;
datas_.emplace(block->data_, index);
if (block->size_ > rounded_size) {
blocks_[index].used_ = true;
auto data = blocks_[index].data_;
datas_.emplace(data, index);
if (blocks_[index].size_ > rounded_size) {
Block *block_next = GetBlock();
auto *block = &blocks_[index];
block_next->size_ = block->size_ - rounded_size;
block->size_ = rounded_size;
block_next->data_ = static_cast<int8_t *>(block->data_) + rounded_size;
@ -117,15 +142,15 @@ void *DynamicMemManager::Malloc(size_t size) {
block->next_index_ = block_next->index_;
free_blocks_.emplace(block_next->size_, block_next->index_);
}
return block->data_;
return data;
}
// todo kAllocUnitSize can be replaced by config
auto allocate_size = rounded_size < kAllocUnitSize ? kAllocUnitSize : rounded_size;
void *data = Allocate(allocate_size);
size_t allocate_size;
void *data = Allocate(rounded_size, node_id_, &allocate_size);
if (UNLIKELY(data == nullptr)) {
return nullptr;
}
all_datas_.emplace_back(data);
all_datas_.emplace(data, allocate_size);
Block *block = GetBlock();
block->size_ = rounded_size;
block->data_ = data;
@ -143,7 +168,7 @@ void *DynamicMemManager::Malloc(size_t size) {
}
// return memory to the memory pool
void DynamicMemManager::Free(void *ptr) {
void MemOperator::Free(void *ptr) {
if (UNLIKELY(ptr == nullptr)) {
return;
}
@ -194,7 +219,7 @@ void DynamicMemManager::Free(void *ptr) {
}
}
void DynamicMemManager::EraseFreeBlock(const int64_t index) {
void MemOperator::EraseFreeBlock(const int64_t index) {
auto range = free_blocks_.equal_range(blocks_[index].size_);
for (auto item = range.first; item != range.second; ++item) {
if (item->second == index) {
@ -204,23 +229,52 @@ void DynamicMemManager::EraseFreeBlock(const int64_t index) {
}
}
DynamicMemManager::~DynamicMemManager() {
MS_LOG(DEBUG) << "~DynamicMemManager() begin.";
MemOperator::MemOperator(int node_id) {
if (node_id >= 0 && NUMAAdapter::GetInstance()->Available()) {
node_id_ = node_id;
auto mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id_);
if (mem_info.total <= 0) {
return;
}
least_free_memory_ = mem_info.total * kDefaultMemoryLeastRatio;
} else {
auto total = lite::GetMaxMallocSize();
least_free_memory_ = total * kDefaultMemoryLeastRatio;
}
blocks_.resize(kBlockSize);
garbage_block_ = kInvalidIndex;
auto *block = GetBlock();
size_t allocate_size;
block->data_ = Allocate(kAllocUnitSize, node_id, &allocate_size);
if (UNLIKELY(block->data_ == nullptr)) {
return;
}
all_datas_.emplace(block->data_, allocate_size);
block->size_ = allocate_size;
free_blocks_.emplace(allocate_size, block->index_);
}
MemOperator::~MemOperator() {
MS_LOG(DEBUG) << "~MemOperator() begin.";
for (auto &&data : all_datas_) {
#ifdef _WIN32
_aligned_free(data);
_aligned_free(data.first);
#else
free(data);
if (node_id_ >= 0) {
NUMAAdapter::GetInstance()->Free(data.first, data.second);
} else {
free(data.first);
}
#endif
data = nullptr;
}
free_blocks_.clear();
all_datas_.clear();
blocks_.clear();
MS_LOG(DEBUG) << "~DynamicMemManager() end.";
MS_LOG(DEBUG) << "~MemOperator() end.";
}
int DynamicMemManager::SetRefCount(void *ptr, int ref_count) {
int MemOperator::SetRefCount(void *ptr, int ref_count) {
std::lock_guard<std::mutex> locker(mutex_);
auto iter = datas_.find(ptr);
if (iter != datas_.end()) {
@ -228,10 +282,10 @@ int DynamicMemManager::SetRefCount(void *ptr, int ref_count) {
blocks_[index].ref_count_ = ref_count;
return ref_count;
}
return -1;
return kInvalidRefCount;
}
int DynamicMemManager::IncRefCount(void *ptr, int ref_count) {
int MemOperator::IncRefCount(void *ptr, int ref_count) {
std::lock_guard<std::mutex> locker(mutex_);
auto iter = datas_.find(ptr);
if (iter != datas_.end()) {
@ -239,10 +293,10 @@ int DynamicMemManager::IncRefCount(void *ptr, int ref_count) {
blocks_[index].ref_count_ += ref_count;
return blocks_[index].ref_count_;
}
return -1;
return kInvalidRefCount;
}
int DynamicMemManager::DecRefCount(void *ptr, int ref_count) {
int MemOperator::DecRefCount(void *ptr, int ref_count) {
std::lock_guard<std::mutex> locker(mutex_);
auto iter = datas_.find(ptr);
if (iter != datas_.end()) {
@ -250,15 +304,39 @@ int DynamicMemManager::DecRefCount(void *ptr, int ref_count) {
blocks_[index].ref_count_ -= ref_count;
return blocks_[index].ref_count_;
}
return -1;
return kInvalidRefCount;
}
int DynamicMemManager::RefCount(void *ptr) {
int MemOperator::RefCount(void *ptr) {
std::lock_guard<std::mutex> locker(mutex_);
auto iter = datas_.find(ptr);
if (iter != datas_.end()) {
return blocks_[iter->second].ref_count_;
}
return -1;
return kInvalidRefCount;
}
std::shared_ptr<MemOperator> DynamicMemManager::GetMemOperator(const int node_id) {
std::map<int, std::shared_ptr<MemOperator>>::iterator iter;
int numa_node_id = node_id;
if (numa_node_id < 0) {
numa_node_id = kInvalidNodeId;
}
std::lock_guard<std::mutex> locker(mutex_);
std::shared_ptr<MemOperator> mem_oper = nullptr;
iter = nodes_mem_.find(numa_node_id);
if (iter == nodes_mem_.end()) {
mem_oper = std::make_shared<MemOperator>(numa_node_id);
if (UNLIKELY(mem_oper == nullptr)) {
MS_LOG(ERROR) << "make_shared MemOperator failed!";
return nullptr;
}
std::cout << "new mem_oper, node_id " << numa_node_id << "\n";
nodes_mem_.insert({numa_node_id, mem_oper});
} else {
mem_oper = iter->second;
}
return mem_oper;
}
} // namespace mindspore

View File

@ -24,8 +24,6 @@
#include <unordered_map>
#include <deque>
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
namespace mindspore {
struct Block {
// used_ may be true when ref_count_ == 0
@ -38,14 +36,10 @@ struct Block {
int64_t next_index_ = -1;
};
class DynamicMemManager {
class MemOperator {
public:
static DynamicMemManager *GetInstance() {
static DynamicMemManager instance;
return &instance;
}
virtual ~DynamicMemManager();
explicit MemOperator(int node_id);
virtual ~MemOperator();
void *Malloc(size_t size);
void Free(void *ptr);
@ -53,14 +47,18 @@ class DynamicMemManager {
int IncRefCount(void *ptr, int ref_count);
int DecRefCount(void *ptr, int ref_count);
int RefCount(void *ptr);
inline void set_node_id(int node_id) { node_id_ = node_id; }
inline int node_id(void) const { return node_id_; }
private:
DynamicMemManager();
Block *GetBlock();
void EraseFreeBlock(const int64_t index);
void AddGarbageBlock(const int64_t index);
void *Allocate(size_t rounded_size, int node_id, size_t *allocate_size);
private:
int node_id_ = -1;
int64_t least_free_memory_ = 0;
// all data blocks
size_t block_count_ = 0;
int64_t garbage_block_;
@ -70,7 +68,21 @@ class DynamicMemManager {
std::multimap<size_t, int64_t> free_blocks_;
// key: data addr, value: Block index
std::unordered_map<void *, int64_t> datas_;
std::vector<void *> all_datas_;
std::unordered_map<void *, size_t> all_datas_;
};
class DynamicMemManager {
public:
static DynamicMemManager *GetInstance() {
static DynamicMemManager instance;
return &instance;
}
std::shared_ptr<MemOperator> GetMemOperator(const int node_id);
private:
std::map<int, std::shared_ptr<MemOperator>> nodes_mem_;
std::mutex mutex_;
};
} // namespace mindspore

View File

@ -0,0 +1,224 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/numa_adapter.h"
#include <dlfcn.h>
#include "src/common/log_adapter.h"
#include "src/common/common.h"
namespace mindspore {
namespace numa {
namespace {
static constexpr int kSuccess = 0;
static constexpr int kBitsPerByte = 8;
} // namespace
NUMAAdapter::NUMAAdapter() {
available_ = false;
handle_ = dlopen("libnuma.so.1.0.0", RTLD_LAZY | RTLD_LOCAL);
if (handle_ == nullptr) {
MS_LOG(WARNING) << "Does not support NUMA.";
return;
}
numa_interfaces_.numa_available = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_available"));
if (UNLIKELY(numa_interfaces_.numa_available == nullptr)) {
MS_LOG(ERROR) << "numa_available not found!";
}
if (numa_interfaces_.numa_available() < 0) {
MS_LOG(ERROR) << "numa is not available!";
(void)dlclose(handle_);
handle_ = nullptr;
return;
}
available_ = true;
numa_interfaces_.numa_num_configured_nodes =
reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_configured_nodes"));
if (UNLIKELY(numa_interfaces_.numa_num_configured_nodes == nullptr)) {
MS_LOG(ERROR) << "numa_num_configured_nodes not found!";
available_ = false;
}
numa_interfaces_.numa_num_task_cpus = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_task_cpus"));
if (UNLIKELY(numa_interfaces_.numa_num_task_cpus == nullptr)) {
MS_LOG(ERROR) << "numa_num_task_cpus not found!";
available_ = false;
}
numa_interfaces_.numa_node_to_cpus =
reinterpret_cast<int (*)(int node, struct bitmask *mask)>(dlsym(handle_, "numa_node_to_cpus"));
if (UNLIKELY(numa_interfaces_.numa_node_to_cpus == nullptr)) {
MS_LOG(ERROR) << "numa_node_to_cpus not found!";
available_ = false;
}
numa_interfaces_.numa_allocate_nodemask =
reinterpret_cast<struct bitmask *(*)(void)>(dlsym(handle_, "numa_allocate_nodemask"));
if (UNLIKELY(numa_interfaces_.numa_allocate_nodemask == nullptr)) {
MS_LOG(ERROR) << "numa_allocate_nodemask not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_clearall =
reinterpret_cast<struct bitmask *(*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_clearall"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_clearall == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_clearall not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_setbit =
reinterpret_cast<struct bitmask *(*)(struct bitmask *, unsigned int)>(dlsym(handle_, "numa_bitmask_setbit"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_setbit == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_setbit not found!";
available_ = false;
}
numa_interfaces_.numa_bind = reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bind"));
if (UNLIKELY(numa_interfaces_.numa_bind == nullptr)) {
MS_LOG(ERROR) << "numa_bind not found!";
available_ = false;
}
numa_interfaces_.numa_bitmask_free =
reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_free"));
if (UNLIKELY(numa_interfaces_.numa_bitmask_free == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_free not found!";
available_ = false;
}
numa_interfaces_.numa_alloc_onnode =
reinterpret_cast<void *(*)(size_t size, int node)>(dlsym(handle_, "numa_alloc_onnode"));
if (UNLIKELY(numa_interfaces_.numa_alloc_onnode == nullptr)) {
MS_LOG(ERROR) << "numa_bitmask_free not found!";
available_ = false;
}
numa_interfaces_.numa_node_size64 =
reinterpret_cast<int64_t (*)(int node, int64_t *freep)>(dlsym(handle_, "numa_node_size64"));
if (UNLIKELY(numa_interfaces_.numa_node_size64 == nullptr)) {
MS_LOG(ERROR) << "numa_node_size64 not found!";
available_ = false;
}
numa_interfaces_.numa_free = reinterpret_cast<void (*)(void *start, size_t size)>(dlsym(handle_, "numa_free"));
if (UNLIKELY(numa_interfaces_.numa_free == nullptr)) {
MS_LOG(ERROR) << "numa_free not found!";
available_ = false;
}
if (!available_) {
(void)dlclose(handle_);
handle_ = nullptr;
return;
}
}
void NUMAAdapter::Bind(int node_id) {
if (!Available() || node_id < 0) {
return;
}
auto bitmask = numa_interfaces_.numa_allocate_nodemask();
if (UNLIKELY(bitmask == nullptr)) {
MS_LOG(ERROR) << "bind numa_node " << node_id << " failed!";
return;
}
(void)numa_interfaces_.numa_bitmask_setbit(bitmask, node_id);
numa_interfaces_.numa_bind(bitmask);
numa_interfaces_.numa_bitmask_free(bitmask);
}
void *NUMAAdapter::Malloc(int node_id, size_t size) {
if (!Available() || node_id < 0) {
return nullptr;
}
return numa_interfaces_.numa_alloc_onnode(size, node_id);
}
void NUMAAdapter::Free(void *data, size_t size) {
if (!Available() || data == nullptr) {
return;
}
numa_interfaces_.numa_free(data, size);
}
int NUMAAdapter::NodesNum() {
if (!Available()) {
return 0;
}
return numa_interfaces_.numa_num_configured_nodes();
}
int NUMAAdapter::CPUNum() {
if (!Available()) {
return 0;
}
return numa_interfaces_.numa_num_task_cpus();
}
std::vector<int> NUMAAdapter::GetCPUList(int node_id) {
std::vector<int> cpu_list;
if (!Available() || node_id < 0) {
return cpu_list;
}
struct bitmask *nodemask = numa_interfaces_.numa_allocate_nodemask();
if (nodemask == nullptr) {
MS_LOG(ERROR) << "allocate nodemask failed!";
return cpu_list;
}
auto ret = numa_interfaces_.numa_node_to_cpus(node_id, nodemask);
if (ret != kSuccess || nodemask->maskp == nullptr) {
MS_LOG(ERROR) << "numa_node_to_cpus failed!ret = " << ret;
return cpu_list;
}
int cpu_num = numa_interfaces_.numa_num_task_cpus();
if (UNLIKELY(cpu_num < 0)) {
MS_LOG(ERROR) << "numa_num_task_cpus return " << cpu_num;
return cpu_list;
}
int index = 0;
int maskp_index = 0;
auto maskp = nodemask->maskp;
do {
if (UNLIKELY(maskp == nullptr)) {
MS_LOG(ERROR) << "maskp is nullptr!";
break;
}
auto mask = *(maskp);
static constexpr auto kBitsPerMask = static_cast<int>(sizeof(decltype(mask)) * kBitsPerByte);
int step = static_cast<int>(maskp_index * kBitsPerMask);
for (int i = 0; i < kBitsPerMask; ++i) {
if (mask & 1) {
cpu_list.emplace_back(i + step);
}
mask >>= 1;
}
index += kBitsPerMask;
if (index >= cpu_num) {
break;
}
maskp = nodemask->maskp + 1;
++maskp_index;
} while (true);
numa_interfaces_.numa_bitmask_free(nodemask);
return cpu_list;
}
MemoryInfo NUMAAdapter::GetNodeSize(int node_id) {
MemoryInfo mem_info;
if (!Available() || node_id < 0) {
return mem_info;
}
mem_info.total = numa_interfaces_.numa_node_size64(node_id, &mem_info.free);
return mem_info;
}
NUMAAdapter::~NUMAAdapter() {
if (handle_ == nullptr) {
return;
}
(void)dlclose(handle_);
}
} // namespace numa
} // namespace mindspore

View File

@ -0,0 +1,71 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
#include <numa.h>
#include <cstddef>
#include <vector>
namespace mindspore {
namespace numa {
struct NUMAInterface {
int (*numa_available)(void);
int (*numa_num_configured_nodes)(void);
int (*numa_num_task_cpus)();
int (*numa_node_to_cpus)(int node, struct bitmask *mask);
struct bitmask *(*numa_allocate_nodemask)(void);
struct bitmask *(*numa_bitmask_clearall)(struct bitmask *);
struct bitmask *(*numa_bitmask_setbit)(struct bitmask *, unsigned int);
void (*numa_bind)(struct bitmask *);
void (*numa_bitmask_free)(struct bitmask *);
void *(*numa_alloc_onnode)(size_t size, int node);
int64_t (*numa_node_size64)(int node, int64_t *freep);
void (*numa_free)(void *start, size_t size);
};
struct MemoryInfo {
int64_t total = 0;
int64_t free = 0;
};
class NUMAAdapter {
public:
static NUMAAdapter *GetInstance() {
static NUMAAdapter instance;
return &instance;
}
virtual ~NUMAAdapter();
inline bool Available() const { return false; }
void Bind(int node_id);
void *Malloc(int node_id, size_t size);
void Free(void *data, size_t size);
int NodesNum();
int CPUNum();
std::vector<int> GetCPUList(int node_id);
MemoryInfo GetNodeSize(int node_id);
private:
NUMAAdapter();
void *handle_; // numa.so handle
bool available_ = false;
NUMAInterface numa_interfaces_;
};
} // namespace numa
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_

View File

@ -131,6 +131,7 @@ set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/runtime/dynamic_mem_allocator.cc
${SRC_DIR}/runtime/dynamic_mem_manager.cc
${SRC_DIR}/runtime/numa_adapter.cc
)
endif()