!26104 core bind for windows

Merge pull request !26104 from ling/pr
This commit is contained in:
i-robot 2021-11-11 02:32:55 +00:00 committed by Gitee
commit e6180c19cc
5 changed files with 121 additions and 61 deletions

View File

@ -145,9 +145,9 @@ int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_nu
return THREAD_ERROR;
}
#endif
#ifdef BIND_CORE
affinity_->SetCoreId(core_list);
#endif
if (affinity_ != nullptr) {
affinity_->SetCoreId(core_list);
}
size_t core_num = std::thread::hardware_concurrency();
THREAD_INFO("ThreadInfo, Actor: [%zu], All: [%zu], CoreNum: [%zu]", actor_thread_num, all_thread_num, core_num);
actor_thread_num_ = actor_thread_num < core_num ? actor_thread_num : core_num;
@ -159,14 +159,7 @@ int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_nu
std::lock_guard<std::mutex> _l(pool_mutex_);
auto worker = new (std::nothrow) ActorWorker();
THREAD_ERROR_IF_NULL(worker);
#ifdef BIND_CORE
cpu_set_t mask;
CPU_ZERO(&mask);
if (core_list.size() > 0) {
CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
}
worker->set_mask(mask);
#endif
worker->InitWorkerMask(core_list, workers_.size());
worker->CreateThread(this);
workers_.push_back(worker);
THREAD_INFO("create actor thread[%zu]", i);

View File

@ -25,8 +25,15 @@
#include <mach/machine.h>
#endif // MS_COMPILE_IOS
#include "thread/threadpool.h"
#ifdef _WIN32
#include <windows.h>
#endif
namespace mindspore {
#ifdef _WIN32
std::vector<DWORD_PTR> WindowsCoreList;
#endif
enum Arch {
UnKnown_Arch = 0,
Cortex_A5,
@ -106,20 +113,20 @@ enum Arch GetArch(int cpu_part) {
int ParseCpuPart(const char *line, int start, int size) {
int cpu_part = 0;
for (int i = start; i < size && i < start + 3; i++) {
for (int i = start; i < size && i < start + PARSE_CPU_GAP; i++) {
char c = line[i];
int d;
if (c >= '0' && c <= '9') {
d = c - '0';
} else if ((c - 'A') < 6) {
d = 10 + (c - 'A');
} else if ((c - 'a') < 6) {
d = 10 + (c - 'a');
} else if ((c - 'A') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
d = PARSE_CPU_DEC + (c - 'A');
} else if ((c - 'a') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
d = PARSE_CPU_DEC + (c - 'a');
} else {
THREAD_ERROR("CPU part in /proc/cpuinfo is ignored due to unexpected non-hex character");
break;
}
cpu_part = cpu_part * 16 + d;
cpu_part = cpu_part * PARSE_CPU_HEX + d;
}
return cpu_part;
}
@ -208,8 +215,31 @@ int GetMaxFrequency(int core_id) {
return max_freq;
}
#ifdef _WIN32
void SetWindowsAffinity(HANDLE thread, DWORD_PTR mask) {
THREAD_INFO("Bind thread[%ld] to core[%lld].", GetThreadId(thread), mask);
SetThreadAffinityMask(thread, mask);
return;
}
void SetWindowsSelfAffinity(uint64_t core_id) {
if (WindowsCoreList.size() <= core_id) {
return;
}
DWORD_PTR mask = WindowsCoreList[core_id];
SetWindowsAffinity(GetCurrentThread(), mask);
return;
}
#endif
int CoreAffinity::InitHardwareCoreInfo() {
core_num_ = std::thread::hardware_concurrency();
#ifdef _WIN32
WindowsCoreList.resize(core_num_);
for (size_t i = 0; i < core_num_; i++) {
WindowsCoreList[i] = 1 << i;
}
#endif
std::vector<CpuInfo> freq_set;
freq_set.resize(core_num_);
core_freq_.resize(core_num_);
@ -248,7 +278,9 @@ int CoreAffinity::InitHardwareCoreInfo() {
std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) {
std::vector<int> bind_id;
#ifdef BIND_CORE
#ifdef _WIN32
return bind_id;
#elif defined(BIND_CORE)
if (core_num_ != sorted_id_.size()) {
THREAD_ERROR("init sorted core id failed");
return bind_id;
@ -270,16 +302,20 @@ std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode)
void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
#ifndef _WIN32
bind_id_.clear();
bind_id_ = GetCoreId(thread_num, bind_mode);
if (bind_id_.empty()) {
return THREAD_ERROR;
}
#endif
return THREAD_OK;
}
#ifdef BIND_CORE
int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) const {
#ifdef _WIN32
int CoreAffinity::SetAffinity() { return THREAD_OK; }
#elif defined(BIND_CORE)
int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) {
#ifdef __ANDROID__
#if __ANDROID_API__ >= 21
THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]);
@ -303,10 +339,12 @@ int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) co
#endif
return THREAD_OK;
}
#endif // BIND_CORE
#endif
int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) const {
#ifdef BIND_CORE
int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) {
#ifdef _WIN32
return THREAD_OK;
#elif defined(BIND_CORE)
cpu_set_t mask;
CPU_ZERO(&mask);
for (int i : bind_id_) {
@ -322,8 +360,10 @@ int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) cons
return THREAD_OK;
}
int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) const {
#ifdef BIND_CORE
int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) {
#ifdef _WIN32
return THREAD_OK;
#elif defined(BIND_CORE)
if (bind_id_.empty()) {
THREAD_ERROR("bind id is empty");
return THREAD_ERROR;
@ -346,8 +386,10 @@ int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) co
return THREAD_OK;
}
int CoreAffinity::BindProcess(BindMode bind_mode) const {
#ifdef BIND_CORE
int CoreAffinity::BindProcess(BindMode bind_mode) {
#ifdef _WIN32
return THREAD_OK;
#elif defined(BIND_CORE)
if (bind_id_.empty()) {
// initializes bind id before bind currently process
THREAD_ERROR("bind id is empty");

View File

@ -24,6 +24,9 @@
#define BIND_CORE
#include <sched.h>
#endif
#ifdef _WIN32
#define BIND_CORE
#endif
namespace mindspore {
enum BindMode {
@ -31,6 +34,13 @@ enum BindMode {
Power_Higher = 1,
Power_Middle = 2,
};
#define PARSE_CPU_GAP 3
#define PARSE_CPU_DEC 10
#define PARSE_CPU_HEX 16
#ifdef _WIN32
void SetWindowsSelfAffinity(uint64_t core_id);
#endif
class Worker;
class CoreAffinity {
@ -42,19 +52,21 @@ class CoreAffinity {
int BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list);
int BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode);
int BindProcess(BindMode bind_mode) const;
int BindProcess(BindMode bind_mode);
std::vector<int> GetCoreId(size_t thread_num, BindMode bind_mode);
void SetCoreId(const std::vector<int> &core_list);
private:
#ifdef BIND_CORE
int SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) const;
#endif // BIND_CORE
#ifdef _WIN32
int SetAffinity();
#elif defined(BIND_CORE)
int SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set);
#endif
int InitBindCoreId(size_t thread_num, BindMode bind_mode);
int BindThreadsToCoreList(const std::vector<Worker *> &workers) const;
int FreeScheduleThreads(const std::vector<Worker *> &workers) const;
int BindThreadsToCoreList(const std::vector<Worker *> &workers);
int FreeScheduleThreads(const std::vector<Worker *> &workers);
// bind_id contains the CPU cores to bind
// the size of bind_id is equal to the size of workers

View File

@ -35,7 +35,9 @@ Worker::~Worker() {
void Worker::CreateThread() { thread_ = std::thread(&Worker::Run, this); }
void Worker::SetAffinity() {
#ifdef BIND_CORE
#ifdef _WIN32
SetWindowsSelfAffinity(core_id_);
#elif defined(BIND_CORE)
#ifdef __ANDROID__
int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_);
if (ret != THREAD_OK) {
@ -54,6 +56,21 @@ void Worker::SetAffinity() {
#endif
}
void Worker::InitWorkerMask(const std::vector<int> &core_list, size_t workers_size) {
#ifdef _WIN32
static uint32_t windows_core_index = 0;
core_id_ = windows_core_index++;
#elif defined(BIND_CORE)
cpu_set_t mask;
CPU_ZERO(&mask);
if (core_list.size() > 0) {
CPU_SET(core_list[workers_size % core_list.size()], &mask);
}
this->set_mask(mask);
#endif
return;
}
void Worker::Run() {
SetAffinity();
#if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
@ -135,8 +152,11 @@ ThreadPool::~ThreadPool() {
worker = nullptr;
}
workers_.clear();
delete affinity_;
affinity_ = nullptr;
if (affinity_ != nullptr) {
delete affinity_;
affinity_ = nullptr;
}
THREAD_INFO("destruct success");
}
@ -152,14 +172,7 @@ int ThreadPool::CreateThreads(size_t thread_num, const std::vector<int> &core_li
for (size_t i = 0; i < thread_num; ++i) {
auto worker = new (std::nothrow) Worker();
THREAD_ERROR_IF_NULL(worker);
#ifdef BIND_CORE
cpu_set_t mask;
CPU_ZERO(&mask);
if (core_list.size() > 0) {
CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
}
worker->set_mask(mask);
#endif
worker->InitWorkerMask(core_list, workers_.size());
worker->CreateThread();
workers_.push_back(worker);
THREAD_INFO("create kernel thread[%zu]", i);
@ -308,33 +321,27 @@ int ThreadPool::SetCpuAffinity(BindMode bind_mode) {
if (workers_.empty()) {
return THREAD_ERROR;
}
#ifdef BIND_CORE
THREAD_ERROR_IF_NULL(affinity_);
return affinity_->BindThreads(workers_, bind_mode);
#else
if (affinity_ != nullptr) {
return affinity_->BindThreads(workers_, bind_mode);
}
return THREAD_OK;
#endif // BIND_CORE
}
int ThreadPool::SetCpuAffinity(const std::vector<int> &core_list) {
if (workers_.empty()) {
return THREAD_ERROR;
}
#ifdef BIND_CORE
THREAD_ERROR_IF_NULL(affinity_);
return affinity_->BindThreads(workers_, core_list);
#else
if (affinity_ != nullptr) {
return affinity_->BindThreads(workers_, core_list);
}
return THREAD_OK;
#endif // BIND_CORE
}
int ThreadPool::SetProcessAffinity(BindMode bind_mode) const {
#ifdef BIND_CORE
THREAD_ERROR_IF_NULL(affinity_);
return affinity_->BindProcess(bind_mode);
#else
if (affinity_ != nullptr) {
return affinity_->BindProcess(bind_mode);
}
return THREAD_OK;
#endif // BIND_CORE
}
void ThreadPool::SetSpinCountMaxValue() {

View File

@ -70,6 +70,7 @@ class Worker {
bool RunLocalKernelTask();
// set max spin count before running
void SetMaxSpinCount(int max_spin_count) { max_spin_count_ = max_spin_count; }
void InitWorkerMask(const std::vector<int> &core_list, size_t workers_size);
void set_frequency(int frequency) { frequency_ = frequency; }
int frequency() const { return frequency_; }
@ -79,7 +80,10 @@ class Worker {
float rhs_scale() const { return rhs_scale_; }
std::thread::id thread_id() const { return thread_.get_id(); }
#ifdef BIND_CORE
#ifdef _WIN32
uint64_t core_id() { return core_id_; }
#elif defined(BIND_CORE)
void set_mask(const cpu_set_t &mask) { mask_ = mask; }
pthread_t handle() { return thread_.native_handle(); }
#endif
@ -92,7 +96,9 @@ class Worker {
bool alive_{true};
std::thread thread_;
#ifdef BIND_CORE
#ifdef _WIN32
uint64_t core_id_;
#elif defined(BIND_CORE)
cpu_set_t mask_;
#endif
std::atomic_int status_{kThreadBusy};