forked from mindspore-Ecosystem/mindspore
!30550 【MS】【LITE】support NUMA
Merge pull request !30550 from chenjianping/master_dev
This commit is contained in:
commit
87b47c76a9
|
@ -136,6 +136,7 @@ set(LITE_SRC
|
|||
${LITE_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
|
||||
enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 };
|
||||
enum HWCK_SHAPE { HWCK_H = 0, HWCK_W = 1, HWCK_C = 2, HWCK_K = 3 };
|
||||
|
@ -57,6 +59,11 @@ static const char *const kMSCacheModelPath = "cache_model_path";
|
|||
static const char *const kMSCacheVocabSize = "vocab_size";
|
||||
static const char *const kMSCacheDeviceSize = "device_cache_size";
|
||||
static const char *const kMSCacheSerializePath = "serialize_path";
|
||||
// config
|
||||
#ifdef SERVER_INFERENCE
|
||||
static const char *const kConfigServerInference = "server_inference";
|
||||
static const char *const kConfigNUMANodeId = "numa_node_id";
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
#ifdef SERVER_INFERENCE
|
||||
#include <sys/sysinfo.h>
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
@ -178,5 +181,17 @@ size_t GetMaxMallocSize() {
|
|||
#endif
|
||||
return max_malloc_size;
|
||||
}
|
||||
|
||||
#ifdef SERVER_INFERENCE
|
||||
int64_t GetFreeMemory() {
|
||||
struct sysinfo info;
|
||||
auto ret = sysinfo(&info);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "sysinfo failed!ret = " << ret;
|
||||
return 0;
|
||||
}
|
||||
return static_cast<int64_t>(info.freeram);
|
||||
}
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -43,6 +43,9 @@ uint64_t GetTimeUs();
|
|||
bool IsSupportSDot();
|
||||
|
||||
size_t GetMaxMallocSize();
|
||||
#ifdef SERVER_INFERENCE
|
||||
int64_t GetFreeMemory();
|
||||
#endif
|
||||
|
||||
#ifdef __ANDROID__
|
||||
uint32_t getHwCap(int hwcap_type);
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
#include "src/lite_session.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/config_file.h"
|
||||
#ifdef SERVER_INFERENCE
|
||||
#include "src/common/common.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
|
@ -684,7 +687,15 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
|
|||
delete context;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef SERVER_INFERENCE
|
||||
auto iter = config_info_.find(lite::kConfigServerInference);
|
||||
if (iter != config_info_.end()) {
|
||||
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);
|
||||
if (numa_iter != iter->second.end()) {
|
||||
context->SetNodeId(std::atoi(numa_iter->second.c_str()));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
session->InitExecutionConfig(&execution_plan_);
|
||||
session->SetConfigInfo(&config_info_);
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
#include "src/runtime/inner_allocator.h"
|
||||
#include "src/common//file_utils.h"
|
||||
#include "src/pack_weight_manager.h"
|
||||
#include "src/runtime/numa_adapter.h"
|
||||
#include "src/common/common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
constexpr int32_t kNumThreads = 4;
|
||||
|
@ -36,6 +39,32 @@ int GetCoreNum() {
|
|||
#endif
|
||||
return core_num;
|
||||
}
|
||||
|
||||
void SetNumaBindStrategy(std::vector<std::vector<int>> *all_model_bind_list, int thread_num, int node_id) {
|
||||
if (UNLIKELY(thread_num == 0)) {
|
||||
MS_LOG(ERROR) << "thread num is zero.";
|
||||
return;
|
||||
}
|
||||
std::vector<int> cpu_list = numa::NUMAAdapter::GetInstance()->GetCPUList(node_id);
|
||||
auto cpu_num = cpu_list.size();
|
||||
if (cpu_num == 0) {
|
||||
return;
|
||||
}
|
||||
std::vector<int> bind_id;
|
||||
bind_id.reserve(thread_num);
|
||||
all_model_bind_list->reserve(cpu_num / thread_num + 1);
|
||||
bind_id.emplace_back(cpu_list[0]);
|
||||
for (size_t i = 1; i < cpu_num; ++i) {
|
||||
if (i % thread_num == 0) {
|
||||
all_model_bind_list->emplace_back(bind_id);
|
||||
bind_id.clear();
|
||||
}
|
||||
bind_id.emplace_back(cpu_list[i]);
|
||||
}
|
||||
if (!bind_id.empty()) {
|
||||
all_model_bind_list->emplace_back(bind_id);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void ModelPool::SetBindStrategy(std::vector<std::vector<int>> *all_model_bind_list, int thread_num) {
|
||||
|
@ -112,14 +141,26 @@ ModelPoolContex ModelPool::CreateModelContext(const std::shared_ptr<RunnerConfig
|
|||
MS_LOG(ERROR) << "context is nullptr.";
|
||||
return {};
|
||||
}
|
||||
if (model_context->GetThreadNum() == 0) {
|
||||
MS_LOG(ERROR) << "thread num is zero.";
|
||||
if (model_context->GetThreadNum() < 1) {
|
||||
MS_LOG(ERROR) << "Invalid thread num " << model_context->GetThreadNum();
|
||||
return {};
|
||||
}
|
||||
int node_id = -1;
|
||||
if (numa::NUMAAdapter::GetInstance()->Available()) {
|
||||
node_id = 0;
|
||||
num_models_ =
|
||||
numa::NUMAAdapter::GetInstance()->GetCPUList(node_id).size() / static_cast<int>(model_context->GetThreadNum());
|
||||
} else {
|
||||
num_models_ = GetCoreNum() / static_cast<int>(model_context->GetThreadNum());
|
||||
}
|
||||
ModelPoolContex model_pool_context;
|
||||
std::vector<std::vector<int>> all_model_bind_list;
|
||||
if (model_context->GetThreadAffinityMode() == lite::HIGHER_CPU) {
|
||||
SetBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()));
|
||||
if (numa::NUMAAdapter::GetInstance()->Available()) {
|
||||
SetNumaBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()), node_id);
|
||||
} else {
|
||||
SetBindStrategy(&all_model_bind_list, static_cast<int>(model_context->GetThreadNum()));
|
||||
}
|
||||
} else if (model_context->GetThreadAffinityMode() == lite::MID_CPU) {
|
||||
MS_LOG(ERROR) << "not support bind MID_CPU.";
|
||||
return {};
|
||||
|
@ -171,6 +212,7 @@ Status ModelPool::Init(const std::string &model_path, const std::shared_ptr<Runn
|
|||
MS_LOG(ERROR) << "CreateModelContext failed, context is empty.";
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
size_t size = 0;
|
||||
graph_buf_ = lite::ReadFile(model_path.c_str(), &size);
|
||||
if (graph_buf_ == nullptr) {
|
||||
|
@ -178,10 +220,14 @@ Status ModelPool::Init(const std::string &model_path, const std::shared_ptr<Runn
|
|||
return kLiteError;
|
||||
}
|
||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByBuf(graph_buf_);
|
||||
int node_id = -1;
|
||||
if (numa::NUMAAdapter::GetInstance()->Available()) {
|
||||
node_id = 0;
|
||||
}
|
||||
std::shared_ptr<ModelThread> model_thread = nullptr;
|
||||
for (size_t i = 0; i < num_models_; i++) {
|
||||
model_thread = std::make_shared<ModelThread>();
|
||||
auto status = model_thread->Init(graph_buf_, size, model_pool_context[i], dec_key, dec_mode);
|
||||
auto status = model_thread->Init(graph_buf_, size, model_pool_context[i], dec_key, dec_mode, node_id);
|
||||
if (status != kSuccess) {
|
||||
MS_LOG(ERROR) << " model thread init failed.";
|
||||
return kLiteError;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "src/cxx_api/model_pool/model_worker.h"
|
||||
#include "src/common/log.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/common.h"
|
||||
namespace mindspore {
|
||||
void ModelThread::Run() {
|
||||
while (!PredictTaskQueue::GetInstance()->IsPredictTaskDone()) {
|
||||
|
@ -59,9 +60,12 @@ void ModelThread::Run() {
|
|||
}
|
||||
|
||||
Status ModelThread::Init(const char *model_buf, size_t size, const std::shared_ptr<Context> &model_context,
|
||||
const Key &dec_key, const std::string &dec_mode) {
|
||||
const Key &dec_key, const std::string &dec_mode, int node_id) {
|
||||
model_ = std::make_shared<Model>();
|
||||
mindspore::ModelType model_type = kMindIR;
|
||||
if (node_id > -1) {
|
||||
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
|
||||
}
|
||||
auto status = model_->Build(model_buf, size, model_type, model_context, dec_key, dec_mode);
|
||||
if (status != kSuccess) {
|
||||
MS_LOG(ERROR) << "model build failed in ModelPool Init";
|
||||
|
|
|
@ -36,7 +36,7 @@ class ModelThread {
|
|||
|
||||
// the model pool is initialized once and can always accept model run requests
|
||||
Status Init(const char *model_buf, size_t size, const std::shared_ptr<Context> &model_context,
|
||||
const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
|
||||
const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm, int node_id = -1);
|
||||
|
||||
std::vector<MSTensor> GetInputs();
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "src/inner_context.h"
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/log_util.h"
|
||||
|
@ -138,7 +139,11 @@ int InnerContext::Init() {
|
|||
}
|
||||
|
||||
if (this->allocator == nullptr) {
|
||||
#ifdef SERVER_INFERENCE
|
||||
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
|
||||
#else
|
||||
this->allocator = mindspore::Allocator::Create();
|
||||
#endif
|
||||
CHECK_NULL_RETURN(this->allocator);
|
||||
}
|
||||
if (IsNpuEnabled()) {
|
||||
|
|
|
@ -20,7 +20,11 @@
|
|||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "include/context.h"
|
||||
#ifdef SERVER_INFERENCE
|
||||
#include "src/runtime/dynamic_mem_allocator.h"
|
||||
#else
|
||||
#include "src/runtime/inner_allocator.h"
|
||||
#endif
|
||||
#include "thread/threadpool.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#ifdef ENABLE_ARM
|
||||
|
@ -82,6 +86,13 @@ struct InnerContext : public Context {
|
|||
|
||||
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
|
||||
|
||||
#ifdef SERVER_INFERENCE
|
||||
/// \brief Set NUMA node id.
|
||||
///
|
||||
/// \param[in] node Define the NUMA node id.
|
||||
inline void SetNodeId(int node_id) { node_id_ = node_id; }
|
||||
#endif
|
||||
|
||||
private:
|
||||
bool IsAllDeviceTypeValid() const;
|
||||
|
||||
|
@ -99,6 +110,10 @@ struct InnerContext : public Context {
|
|||
|
||||
bool device_and_pkg_support_fp16_ = false;
|
||||
|
||||
#ifdef SERVER_INFERENCE
|
||||
int node_id_ = -1;
|
||||
#endif
|
||||
|
||||
ThreadPool *thread_pool_{nullptr};
|
||||
|
||||
// key is the precursor tensor's pointer, value is the group of successors' pointer.
|
||||
|
|
|
@ -40,6 +40,9 @@
|
|||
#include "src/lite_model.h"
|
||||
#include "src/weight_decoder.h"
|
||||
#include "src/runtime/runtime_allocator.h"
|
||||
#ifdef SERVER_INFERENCE
|
||||
#include "src/runtime/dynamic_mem_allocator.h"
|
||||
#endif
|
||||
#include "src/lite_kernel_util.h"
|
||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||
#include "src/registry/register_kernel_impl.h"
|
||||
|
@ -1078,6 +1081,7 @@ int LiteSession::Init(InnerContext *context) {
|
|||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
is_running_.store(false);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -13,18 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef MSLITE_ENABLE_SERVER_INFERENCE
|
||||
#include "src/runtime/dynamic_mem_allocator.h"
|
||||
#else
|
||||
#include "src/runtime/inner_allocator.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
std::shared_ptr<Allocator> Allocator::Create() {
|
||||
#ifdef MSLITE_ENABLE_SERVER_INFERENCE
|
||||
return std::make_shared<DynamicMemAllocator>();
|
||||
#else
|
||||
return std::make_shared<DefaultAllocator>();
|
||||
#endif
|
||||
}
|
||||
std::shared_ptr<Allocator> Allocator::Create() { return std::make_shared<DefaultAllocator>(); }
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -15,40 +15,43 @@
|
|||
*/
|
||||
#include "src/runtime/dynamic_mem_allocator.h"
|
||||
#include <memory>
|
||||
#include "src/runtime/dynamic_mem_manager.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
void *DynamicMemAllocator::Malloc(size_t size) { return DynamicMemManager::GetInstance()->Malloc(size); }
|
||||
void *DynamicMemAllocator::Malloc(size_t size) { return mem_oper_->Malloc(size); }
|
||||
|
||||
void DynamicMemAllocator::Free(void *ptr) { DynamicMemManager::GetInstance()->Free(ptr); }
|
||||
void DynamicMemAllocator::Free(void *ptr) { mem_oper_->Free(ptr); }
|
||||
|
||||
int DynamicMemAllocator::RefCount(void *ptr) {
|
||||
if (ptr == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return DynamicMemManager::GetInstance()->RefCount(ptr);
|
||||
return mem_oper_->RefCount(ptr);
|
||||
}
|
||||
|
||||
int DynamicMemAllocator::SetRefCount(void *ptr, int ref_count) {
|
||||
if (ptr == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return DynamicMemManager::GetInstance()->SetRefCount(ptr, ref_count);
|
||||
return mem_oper_->SetRefCount(ptr, ref_count);
|
||||
}
|
||||
|
||||
int DynamicMemAllocator::IncRefCount(void *ptr, int ref_count) {
|
||||
if (ptr == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return DynamicMemManager::GetInstance()->IncRefCount(ptr, ref_count);
|
||||
return mem_oper_->IncRefCount(ptr, ref_count);
|
||||
}
|
||||
|
||||
int DynamicMemAllocator::DecRefCount(void *ptr, int ref_count) {
|
||||
if (ptr == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
return DynamicMemManager::GetInstance()->DecRefCount(ptr, ref_count);
|
||||
return mem_oper_->DecRefCount(ptr, ref_count);
|
||||
}
|
||||
|
||||
DynamicMemAllocator::DynamicMemAllocator(int node_id) {
|
||||
mem_oper_ = DynamicMemManager::GetInstance()->GetMemOperator(node_id);
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -19,13 +19,15 @@
|
|||
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include "include/api/allocator.h"
|
||||
#include "src/runtime/dynamic_mem_manager.h"
|
||||
|
||||
namespace mindspore {
|
||||
class DynamicMemAllocator : public Allocator {
|
||||
public:
|
||||
DynamicMemAllocator() = default;
|
||||
explicit DynamicMemAllocator(int node_id);
|
||||
virtual ~DynamicMemAllocator() = default;
|
||||
void *Malloc(size_t size) override;
|
||||
void Free(void *ptr) override;
|
||||
|
@ -33,6 +35,9 @@ class DynamicMemAllocator : public Allocator {
|
|||
int SetRefCount(void *ptr, int ref_count) override;
|
||||
int IncRefCount(void *ptr, int ref_count) override;
|
||||
int DecRefCount(void *ptr, int ref_count) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<MemOperator> mem_oper_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -17,6 +17,12 @@
|
|||
#include "src/runtime/dynamic_mem_manager.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/common.h"
|
||||
#include "src/runtime/numa_adapter.h"
|
||||
|
||||
using mindspore::numa::NUMAAdapter;
|
||||
|
||||
using mindspore::numa::MemoryInfo;
|
||||
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
|
@ -24,51 +30,69 @@ namespace {
|
|||
static constexpr size_t kMemAlginSize = 64;
|
||||
|
||||
// The minimum unit size (512M) of memory block used for dynamic extend.
|
||||
static constexpr size_t kAllocUnitSize = 536870912;
|
||||
static constexpr auto kAllocUnitSize = 536870912;
|
||||
|
||||
static constexpr size_t kBlockSize = 1024;
|
||||
static constexpr auto kBlockSize = 2048;
|
||||
// invalid block index
|
||||
static constexpr int64_t kInvalidIndex = -1;
|
||||
static constexpr int kInvalidIndex = -1;
|
||||
// invalid numa node id
|
||||
static constexpr int kInvalidNodeId = -1;
|
||||
static constexpr int kInvalidRefCount = -1;
|
||||
static constexpr float kDefaultMemoryLeastRatio = 0.1;
|
||||
|
||||
size_t Rounded(size_t size) { return (size + kMemAlginSize - 1) & (~(kMemAlginSize - 1)); }
|
||||
} // namespace
|
||||
|
||||
void *Allocate(size_t allocate_size) {
|
||||
if (allocate_size > lite::GetMaxMallocSize()) {
|
||||
MS_LOG(ERROR) << "MallocData out of max_size, size: " << allocate_size;
|
||||
void *MemOperator::Allocate(size_t rounded_size, int node_id, size_t *allocate_size) {
|
||||
int64_t allocate_tmp_size = static_cast<int64_t>(rounded_size < kAllocUnitSize ? kAllocUnitSize : rounded_size);
|
||||
int64_t free_count = 0;
|
||||
int64_t left = 0;
|
||||
if (node_id >= 0) {
|
||||
// allocate memory from numa node
|
||||
MemoryInfo mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id);
|
||||
free_count = mem_info.free;
|
||||
} else {
|
||||
free_count = lite::GetFreeMemory();
|
||||
}
|
||||
|
||||
if (UNLIKELY(static_cast<int64_t>(rounded_size) >= free_count)) {
|
||||
MS_LOG(ERROR) << "No enough memory left!node_id: " << node_id << ", request: " << rounded_size
|
||||
<< ", free: " << free_count << ", least free request: " << least_free_memory_;
|
||||
return nullptr;
|
||||
}
|
||||
if (free_count < allocate_tmp_size) {
|
||||
allocate_tmp_size = rounded_size;
|
||||
}
|
||||
left = free_count - allocate_tmp_size;
|
||||
if (left <= least_free_memory_) {
|
||||
MS_LOG(ERROR) << "No enough memory left!node_id: " << node_id << ", request: " << rounded_size
|
||||
<< ", free: " << free_count << ", least free request: " << least_free_memory_;
|
||||
return nullptr;
|
||||
}
|
||||
*allocate_size = allocate_tmp_size;
|
||||
void *data = nullptr;
|
||||
#ifdef _WIN32
|
||||
data = _aligned_malloc(allocate_size, kMemAlginSize);
|
||||
data = _aligned_malloc(allocate_tmp_size, kMemAlginSize);
|
||||
#else
|
||||
auto ret = posix_memalign(&data, kMemAlginSize, allocate_size);
|
||||
if (UNLIKELY(ret != 0)) {
|
||||
MS_LOG(ERROR) << "posix_memalign failed!ret: " << ret;
|
||||
return nullptr;
|
||||
if (node_id >= 0) {
|
||||
data = NUMAAdapter::GetInstance()->Malloc(node_id, static_cast<size_t>(allocate_tmp_size));
|
||||
} else {
|
||||
auto ret = posix_memalign(&data, kMemAlginSize, static_cast<size_t>(allocate_tmp_size));
|
||||
if (UNLIKELY(ret != 0)) {
|
||||
MS_LOG(ERROR) << "posix_memalign failed!ret: " << ret;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (UNLIKELY(data == nullptr)) {
|
||||
MS_LOG(ERROR) << "malloc data failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
DynamicMemManager::DynamicMemManager() {
|
||||
blocks_.resize(kBlockSize);
|
||||
garbage_block_ = kInvalidIndex;
|
||||
auto *block = GetBlock();
|
||||
block->data_ = Allocate(kAllocUnitSize);
|
||||
if (UNLIKELY(block->data_ == nullptr)) {
|
||||
return;
|
||||
}
|
||||
all_datas_.emplace_back(block->data_);
|
||||
block->size_ = kAllocUnitSize;
|
||||
free_blocks_.emplace(kAllocUnitSize, block->index_);
|
||||
}
|
||||
|
||||
Block *DynamicMemManager::GetBlock() {
|
||||
Block *MemOperator::GetBlock() {
|
||||
Block *block;
|
||||
if (garbage_block_ != kInvalidIndex) {
|
||||
block = &blocks_[garbage_block_];
|
||||
|
@ -87,24 +111,25 @@ Block *DynamicMemManager::GetBlock() {
|
|||
return block;
|
||||
}
|
||||
|
||||
void DynamicMemManager::AddGarbageBlock(const int64_t index) {
|
||||
void MemOperator::AddGarbageBlock(const int64_t index) {
|
||||
blocks_[index].next_index_ = garbage_block_;
|
||||
garbage_block_ = index;
|
||||
}
|
||||
|
||||
// malloc memory for data storage
|
||||
void *DynamicMemManager::Malloc(size_t size) {
|
||||
void *MemOperator::Malloc(size_t size) {
|
||||
auto rounded_size = Rounded(size);
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto iter = free_blocks_.lower_bound(rounded_size);
|
||||
if (iter != free_blocks_.end()) {
|
||||
auto index = iter->second;
|
||||
free_blocks_.erase(iter);
|
||||
auto *block = &blocks_[index];
|
||||
block->used_ = true;
|
||||
datas_.emplace(block->data_, index);
|
||||
if (block->size_ > rounded_size) {
|
||||
blocks_[index].used_ = true;
|
||||
auto data = blocks_[index].data_;
|
||||
datas_.emplace(data, index);
|
||||
if (blocks_[index].size_ > rounded_size) {
|
||||
Block *block_next = GetBlock();
|
||||
auto *block = &blocks_[index];
|
||||
block_next->size_ = block->size_ - rounded_size;
|
||||
block->size_ = rounded_size;
|
||||
block_next->data_ = static_cast<int8_t *>(block->data_) + rounded_size;
|
||||
|
@ -117,15 +142,15 @@ void *DynamicMemManager::Malloc(size_t size) {
|
|||
block->next_index_ = block_next->index_;
|
||||
free_blocks_.emplace(block_next->size_, block_next->index_);
|
||||
}
|
||||
return block->data_;
|
||||
return data;
|
||||
}
|
||||
// todo kAllocUnitSize can be replaced by config
|
||||
auto allocate_size = rounded_size < kAllocUnitSize ? kAllocUnitSize : rounded_size;
|
||||
void *data = Allocate(allocate_size);
|
||||
size_t allocate_size;
|
||||
void *data = Allocate(rounded_size, node_id_, &allocate_size);
|
||||
if (UNLIKELY(data == nullptr)) {
|
||||
return nullptr;
|
||||
}
|
||||
all_datas_.emplace_back(data);
|
||||
all_datas_.emplace(data, allocate_size);
|
||||
Block *block = GetBlock();
|
||||
block->size_ = rounded_size;
|
||||
block->data_ = data;
|
||||
|
@ -143,7 +168,7 @@ void *DynamicMemManager::Malloc(size_t size) {
|
|||
}
|
||||
|
||||
// return memory to the memory pool
|
||||
void DynamicMemManager::Free(void *ptr) {
|
||||
void MemOperator::Free(void *ptr) {
|
||||
if (UNLIKELY(ptr == nullptr)) {
|
||||
return;
|
||||
}
|
||||
|
@ -194,7 +219,7 @@ void DynamicMemManager::Free(void *ptr) {
|
|||
}
|
||||
}
|
||||
|
||||
void DynamicMemManager::EraseFreeBlock(const int64_t index) {
|
||||
void MemOperator::EraseFreeBlock(const int64_t index) {
|
||||
auto range = free_blocks_.equal_range(blocks_[index].size_);
|
||||
for (auto item = range.first; item != range.second; ++item) {
|
||||
if (item->second == index) {
|
||||
|
@ -204,23 +229,52 @@ void DynamicMemManager::EraseFreeBlock(const int64_t index) {
|
|||
}
|
||||
}
|
||||
|
||||
DynamicMemManager::~DynamicMemManager() {
|
||||
MS_LOG(DEBUG) << "~DynamicMemManager() begin.";
|
||||
MemOperator::MemOperator(int node_id) {
|
||||
if (node_id >= 0 && NUMAAdapter::GetInstance()->Available()) {
|
||||
node_id_ = node_id;
|
||||
auto mem_info = NUMAAdapter::GetInstance()->GetNodeSize(node_id_);
|
||||
if (mem_info.total <= 0) {
|
||||
return;
|
||||
}
|
||||
least_free_memory_ = mem_info.total * kDefaultMemoryLeastRatio;
|
||||
} else {
|
||||
auto total = lite::GetMaxMallocSize();
|
||||
least_free_memory_ = total * kDefaultMemoryLeastRatio;
|
||||
}
|
||||
|
||||
blocks_.resize(kBlockSize);
|
||||
garbage_block_ = kInvalidIndex;
|
||||
auto *block = GetBlock();
|
||||
size_t allocate_size;
|
||||
block->data_ = Allocate(kAllocUnitSize, node_id, &allocate_size);
|
||||
if (UNLIKELY(block->data_ == nullptr)) {
|
||||
return;
|
||||
}
|
||||
all_datas_.emplace(block->data_, allocate_size);
|
||||
block->size_ = allocate_size;
|
||||
free_blocks_.emplace(allocate_size, block->index_);
|
||||
}
|
||||
|
||||
MemOperator::~MemOperator() {
|
||||
MS_LOG(DEBUG) << "~MemOperator() begin.";
|
||||
for (auto &&data : all_datas_) {
|
||||
#ifdef _WIN32
|
||||
_aligned_free(data);
|
||||
_aligned_free(data.first);
|
||||
#else
|
||||
free(data);
|
||||
if (node_id_ >= 0) {
|
||||
NUMAAdapter::GetInstance()->Free(data.first, data.second);
|
||||
} else {
|
||||
free(data.first);
|
||||
}
|
||||
#endif
|
||||
data = nullptr;
|
||||
}
|
||||
free_blocks_.clear();
|
||||
all_datas_.clear();
|
||||
blocks_.clear();
|
||||
MS_LOG(DEBUG) << "~DynamicMemManager() end.";
|
||||
MS_LOG(DEBUG) << "~MemOperator() end.";
|
||||
}
|
||||
|
||||
int DynamicMemManager::SetRefCount(void *ptr, int ref_count) {
|
||||
int MemOperator::SetRefCount(void *ptr, int ref_count) {
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto iter = datas_.find(ptr);
|
||||
if (iter != datas_.end()) {
|
||||
|
@ -228,10 +282,10 @@ int DynamicMemManager::SetRefCount(void *ptr, int ref_count) {
|
|||
blocks_[index].ref_count_ = ref_count;
|
||||
return ref_count;
|
||||
}
|
||||
return -1;
|
||||
return kInvalidRefCount;
|
||||
}
|
||||
|
||||
int DynamicMemManager::IncRefCount(void *ptr, int ref_count) {
|
||||
int MemOperator::IncRefCount(void *ptr, int ref_count) {
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto iter = datas_.find(ptr);
|
||||
if (iter != datas_.end()) {
|
||||
|
@ -239,10 +293,10 @@ int DynamicMemManager::IncRefCount(void *ptr, int ref_count) {
|
|||
blocks_[index].ref_count_ += ref_count;
|
||||
return blocks_[index].ref_count_;
|
||||
}
|
||||
return -1;
|
||||
return kInvalidRefCount;
|
||||
}
|
||||
|
||||
int DynamicMemManager::DecRefCount(void *ptr, int ref_count) {
|
||||
int MemOperator::DecRefCount(void *ptr, int ref_count) {
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto iter = datas_.find(ptr);
|
||||
if (iter != datas_.end()) {
|
||||
|
@ -250,15 +304,39 @@ int DynamicMemManager::DecRefCount(void *ptr, int ref_count) {
|
|||
blocks_[index].ref_count_ -= ref_count;
|
||||
return blocks_[index].ref_count_;
|
||||
}
|
||||
return -1;
|
||||
return kInvalidRefCount;
|
||||
}
|
||||
|
||||
int DynamicMemManager::RefCount(void *ptr) {
|
||||
int MemOperator::RefCount(void *ptr) {
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto iter = datas_.find(ptr);
|
||||
if (iter != datas_.end()) {
|
||||
return blocks_[iter->second].ref_count_;
|
||||
}
|
||||
return -1;
|
||||
return kInvalidRefCount;
|
||||
}
|
||||
|
||||
std::shared_ptr<MemOperator> DynamicMemManager::GetMemOperator(const int node_id) {
|
||||
std::map<int, std::shared_ptr<MemOperator>>::iterator iter;
|
||||
int numa_node_id = node_id;
|
||||
if (numa_node_id < 0) {
|
||||
numa_node_id = kInvalidNodeId;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
std::shared_ptr<MemOperator> mem_oper = nullptr;
|
||||
iter = nodes_mem_.find(numa_node_id);
|
||||
if (iter == nodes_mem_.end()) {
|
||||
mem_oper = std::make_shared<MemOperator>(numa_node_id);
|
||||
if (UNLIKELY(mem_oper == nullptr)) {
|
||||
MS_LOG(ERROR) << "make_shared MemOperator failed!";
|
||||
return nullptr;
|
||||
}
|
||||
std::cout << "new mem_oper, node_id " << numa_node_id << "\n";
|
||||
nodes_mem_.insert({numa_node_id, mem_oper});
|
||||
} else {
|
||||
mem_oper = iter->second;
|
||||
}
|
||||
return mem_oper;
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include <unordered_map>
|
||||
#include <deque>
|
||||
|
||||
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
namespace mindspore {
|
||||
struct Block {
|
||||
// used_ may be true when ref_count_ == 0
|
||||
|
@ -38,14 +36,10 @@ struct Block {
|
|||
int64_t next_index_ = -1;
|
||||
};
|
||||
|
||||
class DynamicMemManager {
|
||||
class MemOperator {
|
||||
public:
|
||||
static DynamicMemManager *GetInstance() {
|
||||
static DynamicMemManager instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
virtual ~DynamicMemManager();
|
||||
explicit MemOperator(int node_id);
|
||||
virtual ~MemOperator();
|
||||
|
||||
void *Malloc(size_t size);
|
||||
void Free(void *ptr);
|
||||
|
@ -53,14 +47,18 @@ class DynamicMemManager {
|
|||
int IncRefCount(void *ptr, int ref_count);
|
||||
int DecRefCount(void *ptr, int ref_count);
|
||||
int RefCount(void *ptr);
|
||||
inline void set_node_id(int node_id) { node_id_ = node_id; }
|
||||
inline int node_id(void) const { return node_id_; }
|
||||
|
||||
private:
|
||||
DynamicMemManager();
|
||||
Block *GetBlock();
|
||||
void EraseFreeBlock(const int64_t index);
|
||||
void AddGarbageBlock(const int64_t index);
|
||||
void *Allocate(size_t rounded_size, int node_id, size_t *allocate_size);
|
||||
|
||||
private:
|
||||
int node_id_ = -1;
|
||||
int64_t least_free_memory_ = 0;
|
||||
// all data blocks
|
||||
size_t block_count_ = 0;
|
||||
int64_t garbage_block_;
|
||||
|
@ -70,7 +68,21 @@ class DynamicMemManager {
|
|||
std::multimap<size_t, int64_t> free_blocks_;
|
||||
// key: data addr, value: Block index
|
||||
std::unordered_map<void *, int64_t> datas_;
|
||||
std::vector<void *> all_datas_;
|
||||
std::unordered_map<void *, size_t> all_datas_;
|
||||
};
|
||||
|
||||
class DynamicMemManager {
|
||||
public:
|
||||
static DynamicMemManager *GetInstance() {
|
||||
static DynamicMemManager instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
std::shared_ptr<MemOperator> GetMemOperator(const int node_id);
|
||||
|
||||
private:
|
||||
std::map<int, std::shared_ptr<MemOperator>> nodes_mem_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/numa_adapter.h"
|
||||
#include <dlfcn.h>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace numa {
|
||||
namespace {
|
||||
static constexpr int kSuccess = 0;
|
||||
static constexpr int kBitsPerByte = 8;
|
||||
} // namespace
|
||||
|
||||
NUMAAdapter::NUMAAdapter() {
|
||||
available_ = false;
|
||||
handle_ = dlopen("libnuma.so.1.0.0", RTLD_LAZY | RTLD_LOCAL);
|
||||
if (handle_ == nullptr) {
|
||||
MS_LOG(WARNING) << "Does not support NUMA.";
|
||||
return;
|
||||
}
|
||||
|
||||
numa_interfaces_.numa_available = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_available"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_available == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_available not found!";
|
||||
}
|
||||
if (numa_interfaces_.numa_available() < 0) {
|
||||
MS_LOG(ERROR) << "numa is not available!";
|
||||
(void)dlclose(handle_);
|
||||
handle_ = nullptr;
|
||||
return;
|
||||
}
|
||||
available_ = true;
|
||||
numa_interfaces_.numa_num_configured_nodes =
|
||||
reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_configured_nodes"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_num_configured_nodes == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_num_configured_nodes not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_num_task_cpus = reinterpret_cast<int (*)(void)>(dlsym(handle_, "numa_num_task_cpus"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_num_task_cpus == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_num_task_cpus not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_node_to_cpus =
|
||||
reinterpret_cast<int (*)(int node, struct bitmask *mask)>(dlsym(handle_, "numa_node_to_cpus"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_node_to_cpus == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_node_to_cpus not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_allocate_nodemask =
|
||||
reinterpret_cast<struct bitmask *(*)(void)>(dlsym(handle_, "numa_allocate_nodemask"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_allocate_nodemask == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_allocate_nodemask not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_bitmask_clearall =
|
||||
reinterpret_cast<struct bitmask *(*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_clearall"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_bitmask_clearall == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_bitmask_clearall not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_bitmask_setbit =
|
||||
reinterpret_cast<struct bitmask *(*)(struct bitmask *, unsigned int)>(dlsym(handle_, "numa_bitmask_setbit"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_bitmask_setbit == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_bitmask_setbit not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_bind = reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bind"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_bind == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_bind not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_bitmask_free =
|
||||
reinterpret_cast<void (*)(struct bitmask *)>(dlsym(handle_, "numa_bitmask_free"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_bitmask_free == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_bitmask_free not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_alloc_onnode =
|
||||
reinterpret_cast<void *(*)(size_t size, int node)>(dlsym(handle_, "numa_alloc_onnode"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_alloc_onnode == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_bitmask_free not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_node_size64 =
|
||||
reinterpret_cast<int64_t (*)(int node, int64_t *freep)>(dlsym(handle_, "numa_node_size64"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_node_size64 == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_node_size64 not found!";
|
||||
available_ = false;
|
||||
}
|
||||
numa_interfaces_.numa_free = reinterpret_cast<void (*)(void *start, size_t size)>(dlsym(handle_, "numa_free"));
|
||||
if (UNLIKELY(numa_interfaces_.numa_free == nullptr)) {
|
||||
MS_LOG(ERROR) << "numa_free not found!";
|
||||
available_ = false;
|
||||
}
|
||||
if (!available_) {
|
||||
(void)dlclose(handle_);
|
||||
handle_ = nullptr;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NUMAAdapter::Bind(int node_id) {
|
||||
if (!Available() || node_id < 0) {
|
||||
return;
|
||||
}
|
||||
auto bitmask = numa_interfaces_.numa_allocate_nodemask();
|
||||
if (UNLIKELY(bitmask == nullptr)) {
|
||||
MS_LOG(ERROR) << "bind numa_node " << node_id << " failed!";
|
||||
return;
|
||||
}
|
||||
(void)numa_interfaces_.numa_bitmask_setbit(bitmask, node_id);
|
||||
numa_interfaces_.numa_bind(bitmask);
|
||||
numa_interfaces_.numa_bitmask_free(bitmask);
|
||||
}
|
||||
|
||||
void *NUMAAdapter::Malloc(int node_id, size_t size) {
|
||||
if (!Available() || node_id < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return numa_interfaces_.numa_alloc_onnode(size, node_id);
|
||||
}
|
||||
|
||||
void NUMAAdapter::Free(void *data, size_t size) {
|
||||
if (!Available() || data == nullptr) {
|
||||
return;
|
||||
}
|
||||
numa_interfaces_.numa_free(data, size);
|
||||
}
|
||||
|
||||
int NUMAAdapter::NodesNum() {
|
||||
if (!Available()) {
|
||||
return 0;
|
||||
}
|
||||
return numa_interfaces_.numa_num_configured_nodes();
|
||||
}
|
||||
|
||||
int NUMAAdapter::CPUNum() {
|
||||
if (!Available()) {
|
||||
return 0;
|
||||
}
|
||||
return numa_interfaces_.numa_num_task_cpus();
|
||||
}
|
||||
|
||||
std::vector<int> NUMAAdapter::GetCPUList(int node_id) {
|
||||
std::vector<int> cpu_list;
|
||||
if (!Available() || node_id < 0) {
|
||||
return cpu_list;
|
||||
}
|
||||
struct bitmask *nodemask = numa_interfaces_.numa_allocate_nodemask();
|
||||
if (nodemask == nullptr) {
|
||||
MS_LOG(ERROR) << "allocate nodemask failed!";
|
||||
return cpu_list;
|
||||
}
|
||||
auto ret = numa_interfaces_.numa_node_to_cpus(node_id, nodemask);
|
||||
if (ret != kSuccess || nodemask->maskp == nullptr) {
|
||||
MS_LOG(ERROR) << "numa_node_to_cpus failed!ret = " << ret;
|
||||
return cpu_list;
|
||||
}
|
||||
int cpu_num = numa_interfaces_.numa_num_task_cpus();
|
||||
if (UNLIKELY(cpu_num < 0)) {
|
||||
MS_LOG(ERROR) << "numa_num_task_cpus return " << cpu_num;
|
||||
return cpu_list;
|
||||
}
|
||||
int index = 0;
|
||||
int maskp_index = 0;
|
||||
auto maskp = nodemask->maskp;
|
||||
do {
|
||||
if (UNLIKELY(maskp == nullptr)) {
|
||||
MS_LOG(ERROR) << "maskp is nullptr!";
|
||||
break;
|
||||
}
|
||||
auto mask = *(maskp);
|
||||
static constexpr auto kBitsPerMask = static_cast<int>(sizeof(decltype(mask)) * kBitsPerByte);
|
||||
int step = static_cast<int>(maskp_index * kBitsPerMask);
|
||||
for (int i = 0; i < kBitsPerMask; ++i) {
|
||||
if (mask & 1) {
|
||||
cpu_list.emplace_back(i + step);
|
||||
}
|
||||
mask >>= 1;
|
||||
}
|
||||
index += kBitsPerMask;
|
||||
if (index >= cpu_num) {
|
||||
break;
|
||||
}
|
||||
maskp = nodemask->maskp + 1;
|
||||
++maskp_index;
|
||||
} while (true);
|
||||
|
||||
numa_interfaces_.numa_bitmask_free(nodemask);
|
||||
return cpu_list;
|
||||
}
|
||||
|
||||
MemoryInfo NUMAAdapter::GetNodeSize(int node_id) {
|
||||
MemoryInfo mem_info;
|
||||
if (!Available() || node_id < 0) {
|
||||
return mem_info;
|
||||
}
|
||||
mem_info.total = numa_interfaces_.numa_node_size64(node_id, &mem_info.free);
|
||||
return mem_info;
|
||||
}
|
||||
|
||||
NUMAAdapter::~NUMAAdapter() {
|
||||
if (handle_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
(void)dlclose(handle_);
|
||||
}
|
||||
} // namespace numa
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
|
||||
#include <numa.h>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace mindspore {
|
||||
namespace numa {
|
||||
struct NUMAInterface {
|
||||
int (*numa_available)(void);
|
||||
int (*numa_num_configured_nodes)(void);
|
||||
int (*numa_num_task_cpus)();
|
||||
int (*numa_node_to_cpus)(int node, struct bitmask *mask);
|
||||
struct bitmask *(*numa_allocate_nodemask)(void);
|
||||
struct bitmask *(*numa_bitmask_clearall)(struct bitmask *);
|
||||
struct bitmask *(*numa_bitmask_setbit)(struct bitmask *, unsigned int);
|
||||
void (*numa_bind)(struct bitmask *);
|
||||
void (*numa_bitmask_free)(struct bitmask *);
|
||||
void *(*numa_alloc_onnode)(size_t size, int node);
|
||||
int64_t (*numa_node_size64)(int node, int64_t *freep);
|
||||
void (*numa_free)(void *start, size_t size);
|
||||
};
|
||||
|
||||
struct MemoryInfo {
|
||||
int64_t total = 0;
|
||||
int64_t free = 0;
|
||||
};
|
||||
|
||||
class NUMAAdapter {
|
||||
public:
|
||||
static NUMAAdapter *GetInstance() {
|
||||
static NUMAAdapter instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
virtual ~NUMAAdapter();
|
||||
inline bool Available() const { return false; }
|
||||
void Bind(int node_id);
|
||||
void *Malloc(int node_id, size_t size);
|
||||
void Free(void *data, size_t size);
|
||||
int NodesNum();
|
||||
int CPUNum();
|
||||
std::vector<int> GetCPUList(int node_id);
|
||||
MemoryInfo GetNodeSize(int node_id);
|
||||
|
||||
private:
|
||||
NUMAAdapter();
|
||||
|
||||
void *handle_; // numa.so handle
|
||||
bool available_ = false;
|
||||
NUMAInterface numa_interfaces_;
|
||||
};
|
||||
} // namespace numa
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_NUMA_ADAPTER_H_
|
|
@ -131,6 +131,7 @@ set(LITE_SRC
|
|||
${LITE_SRC}
|
||||
${SRC_DIR}/runtime/dynamic_mem_allocator.cc
|
||||
${SRC_DIR}/runtime/dynamic_mem_manager.cc
|
||||
${SRC_DIR}/runtime/numa_adapter.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
|
|
Loading…
Reference in New Issue