using kernel pool to share the compiling results when running on multi

cards
This commit is contained in:
ckey_Dou 2021-07-27 11:13:29 +08:00
parent 899b89feba
commit d293c5eb26
9 changed files with 498 additions and 67 deletions

2
akg

@ -1 +1 @@
Subproject commit 4aac4d95750a87e664f175c0fa946a069f8a0c2a
Subproject commit 15b59fb739944c1903558659a39b34bb632de448

View File

@ -50,11 +50,6 @@ def _compile_akg_task_gpu(json_strs, attrs):
if not res:
raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs))
pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid()))
if os.path.exists(pid_path):
copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid())))
shutil.rmtree(pid_path)
def _compile_akg_task_ascend(json_strs, attrs):
"""

View File

@ -16,6 +16,11 @@
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <map>
#include <memory>
@ -23,6 +28,7 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include <iostream>
#include "nlohmann/json.hpp"
#include "ir/dtype.h"
#include "ir/func_graph.h"
@ -34,9 +40,334 @@
namespace mindspore {
namespace kernel {
#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \
std::set<size_t> set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]);
#define LIST_BEGIN(list_idx) kernel_lists_[list_idx]
#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_])
#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val
#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val
constexpr int32_t PROCESS_NUM = 16;
constexpr int32_t TIME_OUT = 300;
static inline size_t NameToHashID(const std::string &name) {
auto idx = name.find_last_of("_");
auto hash_id_str = name.substr(idx + 1);
size_t hash_id = 0;
size_t carry = 1;
for (int i = static_cast<int>(hash_id_str.size() - 1); i >= 0; i--) {
hash_id += static_cast<size_t>(std::stoi(hash_id_str.substr(static_cast<size_t>(i), 1))) * carry;
carry *= 10;
}
return hash_id;
}
bool AkgKernelPool::LockMng::TryLock() {
// Try to lock 100 times. Return errno if lock unsuccessfully
uint32_t trial = 100;
int32_t ret = -1;
while (trial > 0) {
ret = lockf(fd_, F_TLOCK, 0);
if (ret == 0 || (errno != EACCES && errno != EAGAIN)) {
break;
}
trial--;
usleep(5000);
}
if (ret == -1) {
MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << ".";
return false;
}
return true;
}
void AkgKernelPool::LockMng::Unlock() {
auto ret = lockf(fd_, F_ULOCK, 0);
if (ret == -1) {
MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno);
}
}
std::string AkgKernelPool::GetCurrentPath() {
char cwd[PATH_MAX];
char *ret = getcwd(cwd, sizeof(cwd));
if (ret == nullptr) {
MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno);
return "";
}
char abspath[PATH_MAX];
char *res = realpath(cwd, abspath);
if (res == nullptr) {
MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno);
return "";
}
return std::string(abspath);
}
void *AkgKernelPool::CreateSharedMem(const std::string &path) {
is_creator_ = false;
auto hash_id = std::hash<std::string>()(path);
auto key_id = static_cast<key_t>(hash_id);
auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512;
{
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return nullptr;
}
// check if the shared memory exists or not.
// remove shared memory if exists and the nattach is 0
struct shmid_ds buf;
auto id = shmget(key_id, mem_size, 0);
if (id != -1) {
auto ret = shmctl(id, IPC_STAT, &buf);
if (ret == -1) {
MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno);
return nullptr;
}
if (buf.shm_nattch == 0) {
ret = shmctl(id, IPC_RMID, nullptr);
if (ret < 0) {
MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
}
}
}
}
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return nullptr;
}
shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600);
if (shm_id_ == -1) {
if (errno == EEXIST) {
shm_id_ = shmget(key_id, mem_size, 0);
}
if (shm_id_ == -1) {
MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno);
return nullptr;
}
} else {
is_creator_ = true;
}
auto local_addr = shmat(shm_id_, nullptr, 0);
if (local_addr == reinterpret_cast<void *>(-1)) {
MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno);
return nullptr;
}
if (is_creator_) {
(void)memset(local_addr, 0, mem_size);
}
return local_addr;
}
int32_t AkgKernelPool::Init(const std::vector<JsonNodePair> &build_args) {
auto cp = GetCurrentPath();
if (cp.empty()) {
return -1;
}
fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd_ == -1) {
MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno);
return -1;
}
auto addr = CreateSharedMem(cp);
if (addr == nullptr) {
return -1;
}
InitKernelLists(addr);
auto ret = AddKernels(build_args);
if (ret != 0) {
MS_LOG(ERROR) << "AkgKernelPool AddKernels failed.";
return false;
}
return 0;
}
AkgKernelPool::~AkgKernelPool() {
// Detach shared memory
auto ret = shmdt(reinterpret_cast<void *>(kernel_lists_[0]));
if (ret < 0) {
MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno);
}
// Realse shared_memroy
if (is_creator_) {
ret = shmctl(shm_id_, IPC_RMID, nullptr);
if (ret < 0) {
MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
}
}
// Close key file
if (fd_ != -1) {
(void)close(fd_);
}
}
int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
}
INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_);
INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
for (const auto &[json_generator, anf_node] : build_args) {
MS_EXCEPTION_IF_NULL(anf_node);
auto kernel_name = json_generator.kernel_name();
auto hash_id = NameToHashID(kernel_name);
if (self_kernel_ids_.count(hash_id) != 0) {
MS_LOG(ERROR) << "Duplicated hash_id in list.";
return -1;
}
self_kernel_ids_.emplace(hash_id);
}
std::set<size_t> diff_from_todo;
std::set<size_t> diff_from_doing;
std::set<size_t> diff_from_done;
// add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list
std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(),
std::inserter(diff_from_todo, diff_from_todo.begin()));
std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(),
std::inserter(diff_from_doing, diff_from_doing.begin()));
std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(),
std::inserter(diff_from_done, diff_from_done.begin()));
auto new_kernel_size = diff_from_done.size();
if (new_kernel_size + todo_list.size() > static_cast<size_t>(kMaxKernelNum_)) {
MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is "
<< kMaxKernelNum_ - todo_list.size();
return -1;
}
std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_));
INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size);
return 0;
}
int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
}
std::set<size_t> left_in_todo_list;
// filter out kernels which belongs to other processes
auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) {
if (this->self_kernel_ids_.count(id) != 0) {
out->emplace(id);
} else {
left_in_todo_list.emplace(id);
}
};
std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList);
std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_));
INCREASE_LIST_SIZE(kDoingIdx_, out->size());
std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_));
RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size());
return 0;
}
int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
if (!ids.empty()) {
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
}
// update the state of finished kernels to `done`
std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_));
INCREASE_LIST_SIZE(kDoneIdx_, ids.size());
// delete the finished kernels from doing_list
std::vector<size_t> left_in_doing_list;
INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(),
std::inserter(left_in_doing_list, left_in_doing_list.begin()));
std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_));
RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size());
}
auto ret = Wait();
if (ret != 0) {
MS_LOG(ERROR) << "AkgKernelPool Wait failed.";
return -1;
}
return 0;
}
int32_t AkgKernelPool::Wait() {
// wait until all the kernels which belong to this process finish compiling
uint32_t trials = 1000;
while (trials > 0) {
{
LockMng lock(fd_);
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
}
INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(),
[&done_list](size_t id) { return done_list.count(id) != 0; })) {
return 0;
}
}
usleep(1000000);
trials--;
}
MS_LOG(ERROR) << "Time out while wait kernel compiling";
return -1;
}
std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args) {
// Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
std::vector<std::string> jsons;
@ -66,6 +397,31 @@ std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::ve
return jsons;
}
std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vector<JsonNodePair> &build_args) {
std::unordered_set<std::string> kernel_name_set;
std::vector<JsonNodePair> new_build_args;
for (const auto &[json_generator, anf_node] : build_args) {
MS_EXCEPTION_IF_NULL(anf_node);
auto kernel_name = json_generator.kernel_name();
auto cached_kernel_pack = AkgSearchCache(kernel_name);
if (cached_kernel_pack != nullptr) {
MS_LOG(DEBUG) << "Use cached kernel, kernel_name[" << kernel_name << "], fullname_with_scope["
<< anf_node->fullname_with_scope() << "].";
AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node);
continue;
}
if (kernel_name_set.count(kernel_name) != 0) {
repeat_nodes_.push_back({json_generator, anf_node});
continue;
}
kernel_name_set.insert(kernel_name);
new_build_args.push_back({json_generator, anf_node});
}
return new_build_args;
}
bool AkgKernelBuilder::InsertToCache(const std::vector<JsonNodePair> &build_args) {
for (const auto &[json_generator, anf_node] : build_args) {
auto kernel_name = json_generator.kernel_name();
@ -97,32 +453,77 @@ bool AkgKernelBuilder::HandleRepeatNodes() {
return true;
}
std::vector<std::string> AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
std::set<size_t> fetched_ids) {
std::vector<std::string> jsons;
for (const auto &[json_generator, anf_node] : build_args) {
MS_EXCEPTION_IF_NULL(anf_node);
auto kernel_name = json_generator.kernel_name();
auto hash_id = NameToHashID(kernel_name);
if (fetched_ids.count(hash_id) == 0) {
continue;
}
auto kernel_json = json_generator.kernel_json_str();
AkgSaveJsonInfo(kernel_name, kernel_json);
jsons.push_back(kernel_json);
}
return jsons;
}
bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args) {
repeat_nodes_.clear();
auto jsons = GetNotCachedKernelJsons(build_args);
if (jsons.empty()) {
auto new_build_args = GetNotCachedKernels(build_args);
if (new_build_args.empty()) {
return true;
}
auto client = GetClient();
MS_EXCEPTION_IF_NULL(client);
if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
MS_LOG(ERROR) << "Akg start failed.";
AkgKernelPool kp;
auto ret = kp.Init(new_build_args);
if (ret != 0) {
MS_LOG(ERROR) << "AkgKernelPool init failed.";
return false;
}
auto attrs = CollectBuildAttrs();
if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
MS_LOG(ERROR) << "Akg send attr failed.";
std::set<size_t> fetched_ids;
ret = kp.FetchKernels(&fetched_ids);
if (ret != 0) {
MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed.";
return false;
}
if (!client->AkgSendData(jsons)) {
MS_LOG(ERROR) << "Akg send data failed.";
return false;
}
if (!client->AkgWait()) {
MS_LOG(ERROR) << "Akg compile failed.";
if (!fetched_ids.empty()) {
auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids);
auto client = GetClient();
MS_EXCEPTION_IF_NULL(client);
if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
MS_LOG(ERROR) << "Akg start failed.";
return false;
}
auto attrs = CollectBuildAttrs();
if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
MS_LOG(ERROR) << "Akg send attr failed.";
return false;
}
if (!client->AkgSendData(jsons)) {
MS_LOG(ERROR) << "Akg send data failed.";
return false;
}
if (!client->AkgWait()) {
MS_LOG(ERROR) << "Akg compile failed.";
return false;
}
}
ret = kp.UpdateAndWait(fetched_ids);
if (ret != 0) {
MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed.";
return false;
}
// All unique done here, cache them and set kernel.
if (!InsertToCache(build_args)) {
MS_LOG(ERROR) << "Insert cache failed.";

View File

@ -17,10 +17,13 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
#include <sys/shm.h>
#include <string>
#include <utility>
#include <vector>
#include <map>
#include <set>
#include "ir/anf.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/session/kernel_build_client.h"
@ -45,12 +48,84 @@ class AkgKernelBuilder {
private:
std::vector<std::string> GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args);
std::vector<JsonNodePair> GetNotCachedKernels(const std::vector<JsonNodePair> &build_args);
std::vector<std::string> GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
std::set<size_t> fetched_ids);
bool InsertToCache(const std::vector<JsonNodePair> &build_args);
bool HandleRepeatNodes();
bool AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args);
std::vector<JsonNodePair> repeat_nodes_;
std::string CollectBuildAttrs();
};
class AkgKernelPool {
public:
class LockMng {
public:
explicit LockMng(int32_t fd) {
fd_ = fd;
locked_ = TryLock();
}
virtual ~LockMng() {
if (locked_) {
Unlock();
}
}
bool locked_{false};
private:
bool TryLock();
void Unlock();
int32_t fd_{-1};
};
public:
AkgKernelPool() = default;
virtual ~AkgKernelPool();
int32_t Init(const std::vector<JsonNodePair> &build_args);
int32_t FetchKernels(std::set<size_t> *out);
int32_t UpdateAndWait(const std::set<size_t> &ids);
constexpr inline static size_t kMaxKernelNum_{1000};
constexpr inline static key_t kSharedMemKey_{0x57565845};
// allocate memory for todo_list, doing_list, done_list
constexpr inline static size_t kListNum_{3};
constexpr inline static auto kKeyName_ = "./akg_build_tmp.key";
constexpr inline static int32_t kToDoIdx_ = 0;
constexpr inline static int32_t kDoingIdx_ = 1;
constexpr inline static int32_t kDoneIdx_ = 2;
private:
void *CreateSharedMem(const std::string &path);
std::string GetCurrentPath();
inline void InitKernelLists(void *addr) {
kernel_lists_[kToDoIdx_] = reinterpret_cast<size_t *>(addr);
kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1;
kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1;
}
int32_t AddKernels(const std::vector<JsonNodePair> &kernel_jsons);
int32_t Wait();
int32_t shm_id_{-1};
bool is_creator_{false};
int32_t fd_{-1};
// includes 3 lists: todo_list, doing_list, done_list.
// each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list
// is stored in kernel_lists_[xx][kMaxKernelNum_]
size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr};
std::set<size_t> self_kernel_ids_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -141,14 +141,8 @@ FusionType GetFusionTypeByName(const std::string &name) {
return iter->first;
}
void KernelMeta::Initialize(int pid) {
if (pid == -1) {
kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
} else {
kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/";
}
// remove old kernel cache
RemoveKernelCache();
void KernelMeta::Initialize() {
kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
#if defined(_WIN32) || defined(_WIN64)
auto ret = mkdir(kernel_meta_path_.c_str());
@ -161,21 +155,6 @@ void KernelMeta::Initialize(int pid) {
initialized_ = true;
}
void KernelMeta::RemoveKernelCache() {
DIR *dir = opendir(kernel_meta_path_.c_str());
if (dir == nullptr) {
return;
}
struct dirent *entry;
while ((entry = readdir(dir)) != nullptr) {
std::string kernel_file = entry->d_name;
std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
(void)remove(kernel_file_realpath.c_str());
}
(void)closedir(dir);
(void)rmdir(kernel_meta_path_.c_str());
}
std::string KernelMeta::Search(const std::string &kernel_name) const {
if (!initialized_) {
return "";
@ -227,7 +206,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
// just a tmp solution.
if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "].";
return nullptr;
} else {
return kernel_pack;
@ -250,7 +229,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
(void)kernel_json.append(kernel_name).append(kJsonSuffix);
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "].";
return nullptr;
}

View File

@ -55,8 +55,7 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
class KernelMeta {
public:
KernelMeta() = default;
void Initialize(int pid);
void RemoveKernelCache();
void Initialize();
std::string Search(const std::string &kernel_name) const;
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
std::string kernel_meta_path() const { return kernel_meta_path_; }

View File

@ -41,8 +41,7 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) {
if (!bin_map->initialized()) {
auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
bin_map->Initialize(pid);
bin_map->Initialize();
}
if (!already_check_nvcc) {
already_check_nvcc = true;

View File

@ -272,14 +272,6 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
if (mem_manager_ != nullptr) {
mem_manager_->FreeDeviceMemory();
}
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
MS_EXCEPTION_IF_NULL(bin_map);
bin_map->RemoveKernelCache();
}
}
void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,

View File

@ -152,15 +152,6 @@ void GPUDeviceContext::Destroy() {
mem_manager_->FreeDeviceMemory();
mem_manager_ = nullptr;
}
// Clean GPU cache kernels which is generated by AKG
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
MS_EXCEPTION_IF_NULL(bin_map);
bin_map->RemoveKernelCache();
}
}
bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {