forked from mindspore-Ecosystem/mindspore
using kernel pool to share the compiling results when running on multi
cards
This commit is contained in:
parent
899b89feba
commit
d293c5eb26
2
akg
2
akg
|
@ -1 +1 @@
|
|||
Subproject commit 4aac4d95750a87e664f175c0fa946a069f8a0c2a
|
||||
Subproject commit 15b59fb739944c1903558659a39b34bb632de448
|
|
@ -50,11 +50,6 @@ def _compile_akg_task_gpu(json_strs, attrs):
|
|||
if not res:
|
||||
raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs))
|
||||
|
||||
pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid()))
|
||||
if os.path.exists(pid_path):
|
||||
copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid())))
|
||||
shutil.rmtree(pid_path)
|
||||
|
||||
|
||||
def _compile_akg_task_ascend(json_strs, attrs):
|
||||
"""
|
||||
|
|
|
@ -16,6 +16,11 @@
|
|||
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
@ -23,6 +28,7 @@
|
|||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "ir/dtype.h"
|
||||
#include "ir/func_graph.h"
|
||||
|
@ -34,9 +40,334 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \
|
||||
std::set<size_t> set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]);
|
||||
|
||||
#define LIST_BEGIN(list_idx) kernel_lists_[list_idx]
|
||||
#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_])
|
||||
#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val
|
||||
|
||||
#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val
|
||||
|
||||
constexpr int32_t PROCESS_NUM = 16;
|
||||
constexpr int32_t TIME_OUT = 300;
|
||||
|
||||
static inline size_t NameToHashID(const std::string &name) {
|
||||
auto idx = name.find_last_of("_");
|
||||
auto hash_id_str = name.substr(idx + 1);
|
||||
size_t hash_id = 0;
|
||||
size_t carry = 1;
|
||||
|
||||
for (int i = static_cast<int>(hash_id_str.size() - 1); i >= 0; i--) {
|
||||
hash_id += static_cast<size_t>(std::stoi(hash_id_str.substr(static_cast<size_t>(i), 1))) * carry;
|
||||
carry *= 10;
|
||||
}
|
||||
|
||||
return hash_id;
|
||||
}
|
||||
|
||||
bool AkgKernelPool::LockMng::TryLock() {
|
||||
// Try to lock 100 times. Return errno if lock unsuccessfully
|
||||
uint32_t trial = 100;
|
||||
|
||||
int32_t ret = -1;
|
||||
while (trial > 0) {
|
||||
ret = lockf(fd_, F_TLOCK, 0);
|
||||
if (ret == 0 || (errno != EACCES && errno != EAGAIN)) {
|
||||
break;
|
||||
}
|
||||
|
||||
trial--;
|
||||
usleep(5000);
|
||||
}
|
||||
|
||||
if (ret == -1) {
|
||||
MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << ".";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void AkgKernelPool::LockMng::Unlock() {
|
||||
auto ret = lockf(fd_, F_ULOCK, 0);
|
||||
if (ret == -1) {
|
||||
MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno);
|
||||
}
|
||||
}
|
||||
|
||||
std::string AkgKernelPool::GetCurrentPath() {
|
||||
char cwd[PATH_MAX];
|
||||
char *ret = getcwd(cwd, sizeof(cwd));
|
||||
if (ret == nullptr) {
|
||||
MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno);
|
||||
return "";
|
||||
}
|
||||
|
||||
char abspath[PATH_MAX];
|
||||
char *res = realpath(cwd, abspath);
|
||||
if (res == nullptr) {
|
||||
MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno);
|
||||
return "";
|
||||
}
|
||||
|
||||
return std::string(abspath);
|
||||
}
|
||||
|
||||
void *AkgKernelPool::CreateSharedMem(const std::string &path) {
|
||||
is_creator_ = false;
|
||||
|
||||
auto hash_id = std::hash<std::string>()(path);
|
||||
auto key_id = static_cast<key_t>(hash_id);
|
||||
auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512;
|
||||
|
||||
{
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// check if the shared memory exists or not.
|
||||
// remove shared memory if exists and the nattach is 0
|
||||
struct shmid_ds buf;
|
||||
auto id = shmget(key_id, mem_size, 0);
|
||||
if (id != -1) {
|
||||
auto ret = shmctl(id, IPC_STAT, &buf);
|
||||
if (ret == -1) {
|
||||
MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (buf.shm_nattch == 0) {
|
||||
ret = shmctl(id, IPC_RMID, nullptr);
|
||||
if (ret < 0) {
|
||||
MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600);
|
||||
if (shm_id_ == -1) {
|
||||
if (errno == EEXIST) {
|
||||
shm_id_ = shmget(key_id, mem_size, 0);
|
||||
}
|
||||
|
||||
if (shm_id_ == -1) {
|
||||
MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno);
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
is_creator_ = true;
|
||||
}
|
||||
|
||||
auto local_addr = shmat(shm_id_, nullptr, 0);
|
||||
if (local_addr == reinterpret_cast<void *>(-1)) {
|
||||
MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (is_creator_) {
|
||||
(void)memset(local_addr, 0, mem_size);
|
||||
}
|
||||
|
||||
return local_addr;
|
||||
}
|
||||
|
||||
int32_t AkgKernelPool::Init(const std::vector<JsonNodePair> &build_args) {
|
||||
auto cp = GetCurrentPath();
|
||||
if (cp.empty()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
if (fd_ == -1) {
|
||||
MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto addr = CreateSharedMem(cp);
|
||||
if (addr == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
InitKernelLists(addr);
|
||||
|
||||
auto ret = AddKernels(build_args);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "AkgKernelPool AddKernels failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
AkgKernelPool::~AkgKernelPool() {
|
||||
// Detach shared memory
|
||||
auto ret = shmdt(reinterpret_cast<void *>(kernel_lists_[0]));
|
||||
if (ret < 0) {
|
||||
MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno);
|
||||
}
|
||||
|
||||
// Realse shared_memroy
|
||||
if (is_creator_) {
|
||||
ret = shmctl(shm_id_, IPC_RMID, nullptr);
|
||||
if (ret < 0) {
|
||||
MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
|
||||
}
|
||||
}
|
||||
|
||||
// Close key file
|
||||
if (fd_ != -1) {
|
||||
(void)close(fd_);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_);
|
||||
INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
|
||||
INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
|
||||
|
||||
for (const auto &[json_generator, anf_node] : build_args) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto kernel_name = json_generator.kernel_name();
|
||||
|
||||
auto hash_id = NameToHashID(kernel_name);
|
||||
if (self_kernel_ids_.count(hash_id) != 0) {
|
||||
MS_LOG(ERROR) << "Duplicated hash_id in list.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
self_kernel_ids_.emplace(hash_id);
|
||||
}
|
||||
|
||||
std::set<size_t> diff_from_todo;
|
||||
std::set<size_t> diff_from_doing;
|
||||
std::set<size_t> diff_from_done;
|
||||
|
||||
// add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list
|
||||
std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(),
|
||||
std::inserter(diff_from_todo, diff_from_todo.begin()));
|
||||
std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(),
|
||||
std::inserter(diff_from_doing, diff_from_doing.begin()));
|
||||
std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(),
|
||||
std::inserter(diff_from_done, diff_from_done.begin()));
|
||||
|
||||
auto new_kernel_size = diff_from_done.size();
|
||||
if (new_kernel_size + todo_list.size() > static_cast<size_t>(kMaxKernelNum_)) {
|
||||
MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is "
|
||||
<< kMaxKernelNum_ - todo_list.size();
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_));
|
||||
INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::set<size_t> left_in_todo_list;
|
||||
|
||||
// filter out kernels which belongs to other processes
|
||||
auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) {
|
||||
if (this->self_kernel_ids_.count(id) != 0) {
|
||||
out->emplace(id);
|
||||
} else {
|
||||
left_in_todo_list.emplace(id);
|
||||
}
|
||||
};
|
||||
|
||||
std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList);
|
||||
|
||||
std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_));
|
||||
INCREASE_LIST_SIZE(kDoingIdx_, out->size());
|
||||
|
||||
std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_));
|
||||
RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
|
||||
if (!ids.empty()) {
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// update the state of finished kernels to `done`
|
||||
std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_));
|
||||
INCREASE_LIST_SIZE(kDoneIdx_, ids.size());
|
||||
|
||||
// delete the finished kernels from doing_list
|
||||
std::vector<size_t> left_in_doing_list;
|
||||
INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
|
||||
std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(),
|
||||
std::inserter(left_in_doing_list, left_in_doing_list.begin()));
|
||||
|
||||
std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_));
|
||||
RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size());
|
||||
}
|
||||
|
||||
auto ret = Wait();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "AkgKernelPool Wait failed.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t AkgKernelPool::Wait() {
|
||||
// wait until all the kernels which belong to this process finish compiling
|
||||
uint32_t trials = 1000;
|
||||
|
||||
while (trials > 0) {
|
||||
{
|
||||
LockMng lock(fd_);
|
||||
if (!lock.locked_) {
|
||||
MS_LOG(ERROR) << "Failed to acquire lock.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
|
||||
|
||||
if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(),
|
||||
[&done_list](size_t id) { return done_list.count(id) != 0; })) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
usleep(1000000);
|
||||
trials--;
|
||||
}
|
||||
|
||||
MS_LOG(ERROR) << "Time out while wait kernel compiling";
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args) {
|
||||
// Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
|
||||
std::vector<std::string> jsons;
|
||||
|
@ -66,6 +397,31 @@ std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::ve
|
|||
return jsons;
|
||||
}
|
||||
|
||||
std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vector<JsonNodePair> &build_args) {
|
||||
std::unordered_set<std::string> kernel_name_set;
|
||||
std::vector<JsonNodePair> new_build_args;
|
||||
for (const auto &[json_generator, anf_node] : build_args) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto kernel_name = json_generator.kernel_name();
|
||||
|
||||
auto cached_kernel_pack = AkgSearchCache(kernel_name);
|
||||
if (cached_kernel_pack != nullptr) {
|
||||
MS_LOG(DEBUG) << "Use cached kernel, kernel_name[" << kernel_name << "], fullname_with_scope["
|
||||
<< anf_node->fullname_with_scope() << "].";
|
||||
AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (kernel_name_set.count(kernel_name) != 0) {
|
||||
repeat_nodes_.push_back({json_generator, anf_node});
|
||||
continue;
|
||||
}
|
||||
kernel_name_set.insert(kernel_name);
|
||||
new_build_args.push_back({json_generator, anf_node});
|
||||
}
|
||||
return new_build_args;
|
||||
}
|
||||
|
||||
bool AkgKernelBuilder::InsertToCache(const std::vector<JsonNodePair> &build_args) {
|
||||
for (const auto &[json_generator, anf_node] : build_args) {
|
||||
auto kernel_name = json_generator.kernel_name();
|
||||
|
@ -97,32 +453,77 @@ bool AkgKernelBuilder::HandleRepeatNodes() {
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::string> AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
|
||||
std::set<size_t> fetched_ids) {
|
||||
std::vector<std::string> jsons;
|
||||
for (const auto &[json_generator, anf_node] : build_args) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto kernel_name = json_generator.kernel_name();
|
||||
|
||||
auto hash_id = NameToHashID(kernel_name);
|
||||
|
||||
if (fetched_ids.count(hash_id) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto kernel_json = json_generator.kernel_json_str();
|
||||
AkgSaveJsonInfo(kernel_name, kernel_json);
|
||||
jsons.push_back(kernel_json);
|
||||
}
|
||||
return jsons;
|
||||
}
|
||||
|
||||
bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args) {
|
||||
repeat_nodes_.clear();
|
||||
auto jsons = GetNotCachedKernelJsons(build_args);
|
||||
if (jsons.empty()) {
|
||||
auto new_build_args = GetNotCachedKernels(build_args);
|
||||
if (new_build_args.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto client = GetClient();
|
||||
MS_EXCEPTION_IF_NULL(client);
|
||||
if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
|
||||
MS_LOG(ERROR) << "Akg start failed.";
|
||||
AkgKernelPool kp;
|
||||
auto ret = kp.Init(new_build_args);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "AkgKernelPool init failed.";
|
||||
return false;
|
||||
}
|
||||
auto attrs = CollectBuildAttrs();
|
||||
if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
|
||||
MS_LOG(ERROR) << "Akg send attr failed.";
|
||||
|
||||
std::set<size_t> fetched_ids;
|
||||
ret = kp.FetchKernels(&fetched_ids);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed.";
|
||||
return false;
|
||||
}
|
||||
if (!client->AkgSendData(jsons)) {
|
||||
MS_LOG(ERROR) << "Akg send data failed.";
|
||||
return false;
|
||||
}
|
||||
if (!client->AkgWait()) {
|
||||
MS_LOG(ERROR) << "Akg compile failed.";
|
||||
|
||||
if (!fetched_ids.empty()) {
|
||||
auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids);
|
||||
|
||||
auto client = GetClient();
|
||||
MS_EXCEPTION_IF_NULL(client);
|
||||
if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
|
||||
MS_LOG(ERROR) << "Akg start failed.";
|
||||
return false;
|
||||
}
|
||||
auto attrs = CollectBuildAttrs();
|
||||
if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
|
||||
MS_LOG(ERROR) << "Akg send attr failed.";
|
||||
return false;
|
||||
}
|
||||
if (!client->AkgSendData(jsons)) {
|
||||
MS_LOG(ERROR) << "Akg send data failed.";
|
||||
return false;
|
||||
}
|
||||
if (!client->AkgWait()) {
|
||||
MS_LOG(ERROR) << "Akg compile failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ret = kp.UpdateAndWait(fetched_ids);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// All unique done here, cache them and set kernel.
|
||||
if (!InsertToCache(build_args)) {
|
||||
MS_LOG(ERROR) << "Insert cache failed.";
|
||||
|
|
|
@ -17,10 +17,13 @@
|
|||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
|
||||
|
||||
#include <sys/shm.h>
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "ir/anf.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/session/kernel_build_client.h"
|
||||
|
@ -45,12 +48,84 @@ class AkgKernelBuilder {
|
|||
|
||||
private:
|
||||
std::vector<std::string> GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args);
|
||||
std::vector<JsonNodePair> GetNotCachedKernels(const std::vector<JsonNodePair> &build_args);
|
||||
std::vector<std::string> GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
|
||||
std::set<size_t> fetched_ids);
|
||||
bool InsertToCache(const std::vector<JsonNodePair> &build_args);
|
||||
bool HandleRepeatNodes();
|
||||
bool AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args);
|
||||
std::vector<JsonNodePair> repeat_nodes_;
|
||||
std::string CollectBuildAttrs();
|
||||
};
|
||||
|
||||
class AkgKernelPool {
|
||||
public:
|
||||
class LockMng {
|
||||
public:
|
||||
explicit LockMng(int32_t fd) {
|
||||
fd_ = fd;
|
||||
locked_ = TryLock();
|
||||
}
|
||||
|
||||
virtual ~LockMng() {
|
||||
if (locked_) {
|
||||
Unlock();
|
||||
}
|
||||
}
|
||||
|
||||
bool locked_{false};
|
||||
|
||||
private:
|
||||
bool TryLock();
|
||||
void Unlock();
|
||||
|
||||
int32_t fd_{-1};
|
||||
};
|
||||
|
||||
public:
|
||||
AkgKernelPool() = default;
|
||||
virtual ~AkgKernelPool();
|
||||
|
||||
int32_t Init(const std::vector<JsonNodePair> &build_args);
|
||||
int32_t FetchKernels(std::set<size_t> *out);
|
||||
int32_t UpdateAndWait(const std::set<size_t> &ids);
|
||||
|
||||
constexpr inline static size_t kMaxKernelNum_{1000};
|
||||
constexpr inline static key_t kSharedMemKey_{0x57565845};
|
||||
|
||||
// allocate memory for todo_list, doing_list, done_list
|
||||
constexpr inline static size_t kListNum_{3};
|
||||
|
||||
constexpr inline static auto kKeyName_ = "./akg_build_tmp.key";
|
||||
|
||||
constexpr inline static int32_t kToDoIdx_ = 0;
|
||||
constexpr inline static int32_t kDoingIdx_ = 1;
|
||||
constexpr inline static int32_t kDoneIdx_ = 2;
|
||||
|
||||
private:
|
||||
void *CreateSharedMem(const std::string &path);
|
||||
std::string GetCurrentPath();
|
||||
|
||||
inline void InitKernelLists(void *addr) {
|
||||
kernel_lists_[kToDoIdx_] = reinterpret_cast<size_t *>(addr);
|
||||
kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1;
|
||||
kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1;
|
||||
}
|
||||
|
||||
int32_t AddKernels(const std::vector<JsonNodePair> &kernel_jsons);
|
||||
int32_t Wait();
|
||||
|
||||
int32_t shm_id_{-1};
|
||||
bool is_creator_{false};
|
||||
int32_t fd_{-1};
|
||||
|
||||
// includes 3 lists: todo_list, doing_list, done_list.
|
||||
// each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list
|
||||
// is stored in kernel_lists_[xx][kMaxKernelNum_]
|
||||
size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr};
|
||||
|
||||
std::set<size_t> self_kernel_ids_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -141,14 +141,8 @@ FusionType GetFusionTypeByName(const std::string &name) {
|
|||
return iter->first;
|
||||
}
|
||||
|
||||
void KernelMeta::Initialize(int pid) {
|
||||
if (pid == -1) {
|
||||
kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
|
||||
} else {
|
||||
kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/";
|
||||
}
|
||||
// remove old kernel cache
|
||||
RemoveKernelCache();
|
||||
void KernelMeta::Initialize() {
|
||||
kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
auto ret = mkdir(kernel_meta_path_.c_str());
|
||||
|
@ -161,21 +155,6 @@ void KernelMeta::Initialize(int pid) {
|
|||
initialized_ = true;
|
||||
}
|
||||
|
||||
void KernelMeta::RemoveKernelCache() {
|
||||
DIR *dir = opendir(kernel_meta_path_.c_str());
|
||||
if (dir == nullptr) {
|
||||
return;
|
||||
}
|
||||
struct dirent *entry;
|
||||
while ((entry = readdir(dir)) != nullptr) {
|
||||
std::string kernel_file = entry->d_name;
|
||||
std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
|
||||
(void)remove(kernel_file_realpath.c_str());
|
||||
}
|
||||
(void)closedir(dir);
|
||||
(void)rmdir(kernel_meta_path_.c_str());
|
||||
}
|
||||
|
||||
std::string KernelMeta::Search(const std::string &kernel_name) const {
|
||||
if (!initialized_) {
|
||||
return "";
|
||||
|
@ -227,7 +206,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
|
|||
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
|
||||
// just a tmp solution.
|
||||
if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
|
||||
MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
|
||||
MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "].";
|
||||
return nullptr;
|
||||
} else {
|
||||
return kernel_pack;
|
||||
|
@ -250,7 +229,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
|
|||
(void)kernel_json.append(kernel_name).append(kJsonSuffix);
|
||||
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
|
||||
if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
|
||||
MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
|
||||
MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "].";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -55,8 +55,7 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
|
|||
class KernelMeta {
|
||||
public:
|
||||
KernelMeta() = default;
|
||||
void Initialize(int pid);
|
||||
void RemoveKernelCache();
|
||||
void Initialize();
|
||||
std::string Search(const std::string &kernel_name) const;
|
||||
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
|
||||
std::string kernel_meta_path() const { return kernel_meta_path_; }
|
||||
|
|
|
@ -41,8 +41,7 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
|
|||
|
||||
if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) {
|
||||
if (!bin_map->initialized()) {
|
||||
auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
|
||||
bin_map->Initialize(pid);
|
||||
bin_map->Initialize();
|
||||
}
|
||||
if (!already_check_nvcc) {
|
||||
already_check_nvcc = true;
|
||||
|
|
|
@ -272,14 +272,6 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
|
|||
if (mem_manager_ != nullptr) {
|
||||
mem_manager_->FreeDeviceMemory();
|
||||
}
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
|
||||
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(bin_map);
|
||||
bin_map->RemoveKernelCache();
|
||||
}
|
||||
}
|
||||
|
||||
void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
|
||||
|
|
|
@ -152,15 +152,6 @@ void GPUDeviceContext::Destroy() {
|
|||
mem_manager_->FreeDeviceMemory();
|
||||
mem_manager_ = nullptr;
|
||||
}
|
||||
|
||||
// Clean GPU cache kernels which is generated by AKG
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
|
||||
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(bin_map);
|
||||
bin_map->RemoveKernelCache();
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {
|
||||
|
|
Loading…
Reference in New Issue