From d293c5eb26113a8a379e91cc66c7dad90776c604 Mon Sep 17 00:00:00 2001 From: ckey_Dou Date: Tue, 27 Jul 2021 11:13:29 +0800 Subject: [PATCH] using kernel pool to share the compiling results when running on multi cards --- akg | 2 +- .../akg_compiler/akg_process.py | 5 - .../kernel_compiler/akg/akg_kernel_build.cc | 431 +++++++++++++++++- .../kernel_compiler/akg/akg_kernel_build.h | 75 +++ .../backend/kernel_compiler/common_utils.cc | 29 +- .../backend/kernel_compiler/common_utils.h | 3 +- .../runtime/device/gpu/gpu_kernel_build.cc | 3 +- .../runtime/device/gpu/gpu_kernel_runtime.cc | 8 - .../hardware/gpu/gpu_device_context.cc | 9 - 9 files changed, 498 insertions(+), 67 deletions(-) diff --git a/akg b/akg index 4aac4d95750..15b59fb7399 160000 --- a/akg +++ b/akg @@ -1 +1 @@ -Subproject commit 4aac4d95750a87e664f175c0fa946a069f8a0c2a +Subproject commit 15b59fb739944c1903558659a39b34bb632de448 diff --git a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py index c6487c9f17c..d3f0bbf1641 100644 --- a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +++ b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py @@ -50,11 +50,6 @@ def _compile_akg_task_gpu(json_strs, attrs): if not res: raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs)) - pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid())) - if os.path.exists(pid_path): - copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid()))) - shutil.rmtree(pid_path) - def _compile_akg_task_ascend(json_strs, attrs): """ diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc index 8b047f153a0..b6d70406d5c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc @@ -16,6 +16,11 @@ #include "backend/kernel_compiler/akg/akg_kernel_build.h" +#include +#include +#include +#include + #include #include #include @@ -23,6 +28,7 @@ #include #include #include +#include #include "nlohmann/json.hpp" #include "ir/dtype.h" #include "ir/func_graph.h" @@ -34,9 +40,334 @@ namespace mindspore { namespace kernel { + +#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \ + std::set set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]); + +#define LIST_BEGIN(list_idx) kernel_lists_[list_idx] +#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]) +#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val + +#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val + constexpr int32_t PROCESS_NUM = 16; constexpr int32_t TIME_OUT = 300; +static inline size_t NameToHashID(const std::string &name) { + auto idx = name.find_last_of("_"); + auto hash_id_str = name.substr(idx + 1); + size_t hash_id = 0; + size_t carry = 1; + + for (int i = static_cast(hash_id_str.size() - 1); i >= 0; i--) { + hash_id += static_cast(std::stoi(hash_id_str.substr(static_cast(i), 1))) * carry; + carry *= 10; + } + + return hash_id; +} + +bool AkgKernelPool::LockMng::TryLock() { + // Try to lock 100 times. Return errno if lock unsuccessfully + uint32_t trial = 100; + + int32_t ret = -1; + while (trial > 0) { + ret = lockf(fd_, F_TLOCK, 0); + if (ret == 0 || (errno != EACCES && errno != EAGAIN)) { + break; + } + + trial--; + usleep(5000); + } + + if (ret == -1) { + MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << "."; + return false; + } + + return true; +} + +void AkgKernelPool::LockMng::Unlock() { + auto ret = lockf(fd_, F_ULOCK, 0); + if (ret == -1) { + MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno); + } +} + +std::string AkgKernelPool::GetCurrentPath() { + char cwd[PATH_MAX]; + char *ret = getcwd(cwd, sizeof(cwd)); + if (ret == nullptr) { + MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno); + return ""; + } + + char abspath[PATH_MAX]; + char *res = realpath(cwd, abspath); + if (res == nullptr) { + MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno); + return ""; + } + + return std::string(abspath); +} + +void *AkgKernelPool::CreateSharedMem(const std::string &path) { + is_creator_ = false; + + auto hash_id = std::hash()(path); + auto key_id = static_cast(hash_id); + auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512; + + { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return nullptr; + } + + // check if the shared memory exists or not. + // remove shared memory if exists and the nattach is 0 + struct shmid_ds buf; + auto id = shmget(key_id, mem_size, 0); + if (id != -1) { + auto ret = shmctl(id, IPC_STAT, &buf); + if (ret == -1) { + MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno); + return nullptr; + } + + if (buf.shm_nattch == 0) { + ret = shmctl(id, IPC_RMID, nullptr); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno); + } + } + } + } + + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return nullptr; + } + + shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600); + if (shm_id_ == -1) { + if (errno == EEXIST) { + shm_id_ = shmget(key_id, mem_size, 0); + } + + if (shm_id_ == -1) { + MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno); + return nullptr; + } + } else { + is_creator_ = true; + } + + auto local_addr = shmat(shm_id_, nullptr, 0); + if (local_addr == reinterpret_cast(-1)) { + MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno); + return nullptr; + } + + if (is_creator_) { + (void)memset(local_addr, 0, mem_size); + } + + return local_addr; +} + +int32_t AkgKernelPool::Init(const std::vector &build_args) { + auto cp = GetCurrentPath(); + if (cp.empty()) { + return -1; + } + + fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + if (fd_ == -1) { + MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno); + return -1; + } + + auto addr = CreateSharedMem(cp); + if (addr == nullptr) { + return -1; + } + + InitKernelLists(addr); + + auto ret = AddKernels(build_args); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool AddKernels failed."; + return false; + } + + return 0; +} + +AkgKernelPool::~AkgKernelPool() { + // Detach shared memory + auto ret = shmdt(reinterpret_cast(kernel_lists_[0])); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno); + } + + // Realse shared_memroy + if (is_creator_) { + ret = shmctl(shm_id_, IPC_RMID, nullptr); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno); + } + } + + // Close key file + if (fd_ != -1) { + (void)close(fd_); + } +} + +int32_t AkgKernelPool::AddKernels(const std::vector &build_args) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_); + INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_); + INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_); + + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto hash_id = NameToHashID(kernel_name); + if (self_kernel_ids_.count(hash_id) != 0) { + MS_LOG(ERROR) << "Duplicated hash_id in list."; + return -1; + } + + self_kernel_ids_.emplace(hash_id); + } + + std::set diff_from_todo; + std::set diff_from_doing; + std::set diff_from_done; + + // add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list + std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(), + std::inserter(diff_from_todo, diff_from_todo.begin())); + std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(), + std::inserter(diff_from_doing, diff_from_doing.begin())); + std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(), + std::inserter(diff_from_done, diff_from_done.begin())); + + auto new_kernel_size = diff_from_done.size(); + if (new_kernel_size + todo_list.size() > static_cast(kMaxKernelNum_)) { + MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is " + << kMaxKernelNum_ - todo_list.size(); + return -1; + } + + std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_)); + INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size); + + return 0; +} + +int32_t AkgKernelPool::FetchKernels(std::set *out) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + std::set left_in_todo_list; + + // filter out kernels which belongs to other processes + auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) { + if (this->self_kernel_ids_.count(id) != 0) { + out->emplace(id); + } else { + left_in_todo_list.emplace(id); + } + }; + + std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList); + + std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_)); + INCREASE_LIST_SIZE(kDoingIdx_, out->size()); + + std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_)); + RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size()); + + return 0; +} + +int32_t AkgKernelPool::UpdateAndWait(const std::set &ids) { + if (!ids.empty()) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + // update the state of finished kernels to `done` + std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_)); + INCREASE_LIST_SIZE(kDoneIdx_, ids.size()); + + // delete the finished kernels from doing_list + std::vector left_in_doing_list; + INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_); + std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(), + std::inserter(left_in_doing_list, left_in_doing_list.begin())); + + std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_)); + RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size()); + } + + auto ret = Wait(); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool Wait failed."; + return -1; + } + + return 0; +} + +int32_t AkgKernelPool::Wait() { + // wait until all the kernels which belong to this process finish compiling + uint32_t trials = 1000; + + while (trials > 0) { + { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_); + + if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(), + [&done_list](size_t id) { return done_list.count(id) != 0; })) { + return 0; + } + } + + usleep(1000000); + trials--; + } + + MS_LOG(ERROR) << "Time out while wait kernel compiling"; + return -1; +} + std::vector AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector &build_args) { // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. std::vector jsons; @@ -66,6 +397,31 @@ std::vector AkgKernelBuilder::GetNotCachedKernelJsons(const std::ve return jsons; } +std::vector AkgKernelBuilder::GetNotCachedKernels(const std::vector &build_args) { + std::unordered_set kernel_name_set; + std::vector new_build_args; + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto cached_kernel_pack = AkgSearchCache(kernel_name); + if (cached_kernel_pack != nullptr) { + MS_LOG(DEBUG) << "Use cached kernel, kernel_name[" << kernel_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node); + continue; + } + + if (kernel_name_set.count(kernel_name) != 0) { + repeat_nodes_.push_back({json_generator, anf_node}); + continue; + } + kernel_name_set.insert(kernel_name); + new_build_args.push_back({json_generator, anf_node}); + } + return new_build_args; +} + bool AkgKernelBuilder::InsertToCache(const std::vector &build_args) { for (const auto &[json_generator, anf_node] : build_args) { auto kernel_name = json_generator.kernel_name(); @@ -97,32 +453,77 @@ bool AkgKernelBuilder::HandleRepeatNodes() { return true; } +std::vector AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector &build_args, + std::set fetched_ids) { + std::vector jsons; + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto hash_id = NameToHashID(kernel_name); + + if (fetched_ids.count(hash_id) == 0) { + continue; + } + + auto kernel_json = json_generator.kernel_json_str(); + AkgSaveJsonInfo(kernel_name, kernel_json); + jsons.push_back(kernel_json); + } + return jsons; +} + bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector &build_args) { repeat_nodes_.clear(); - auto jsons = GetNotCachedKernelJsons(build_args); - if (jsons.empty()) { + auto new_build_args = GetNotCachedKernels(build_args); + if (new_build_args.empty()) { return true; } - auto client = GetClient(); - MS_EXCEPTION_IF_NULL(client); - if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) { - MS_LOG(ERROR) << "Akg start failed."; + AkgKernelPool kp; + auto ret = kp.Init(new_build_args); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool init failed."; return false; } - auto attrs = CollectBuildAttrs(); - if (!attrs.empty() && !client->AkgSendAttr(attrs)) { - MS_LOG(ERROR) << "Akg send attr failed."; + + std::set fetched_ids; + ret = kp.FetchKernels(&fetched_ids); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed."; return false; } - if (!client->AkgSendData(jsons)) { - MS_LOG(ERROR) << "Akg send data failed."; - return false; - } - if (!client->AkgWait()) { - MS_LOG(ERROR) << "Akg compile failed."; + + if (!fetched_ids.empty()) { + auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids); + + auto client = GetClient(); + MS_EXCEPTION_IF_NULL(client); + if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) { + MS_LOG(ERROR) << "Akg start failed."; + return false; + } + auto attrs = CollectBuildAttrs(); + if (!attrs.empty() && !client->AkgSendAttr(attrs)) { + MS_LOG(ERROR) << "Akg send attr failed."; + return false; + } + if (!client->AkgSendData(jsons)) { + MS_LOG(ERROR) << "Akg send data failed."; + return false; + } + if (!client->AkgWait()) { + MS_LOG(ERROR) << "Akg compile failed."; + return false; + } + } + + ret = kp.UpdateAndWait(fetched_ids); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed."; return false; } + // All unique done here, cache them and set kernel. if (!InsertToCache(build_args)) { MS_LOG(ERROR) << "Insert cache failed."; diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h index c0012ece6ff..9f9958f1464 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h @@ -17,10 +17,13 @@ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_ +#include + #include #include #include #include +#include #include "ir/anf.h" #include "backend/kernel_compiler/kernel.h" #include "backend/session/kernel_build_client.h" @@ -45,12 +48,84 @@ class AkgKernelBuilder { private: std::vector GetNotCachedKernelJsons(const std::vector &build_args); + std::vector GetNotCachedKernels(const std::vector &build_args); + std::vector GetKernelJsonsByHashId(const std::vector &build_args, + std::set fetched_ids); bool InsertToCache(const std::vector &build_args); bool HandleRepeatNodes(); bool AkgOpParallelBuild(const std::vector &build_args); std::vector repeat_nodes_; std::string CollectBuildAttrs(); }; + +class AkgKernelPool { + public: + class LockMng { + public: + explicit LockMng(int32_t fd) { + fd_ = fd; + locked_ = TryLock(); + } + + virtual ~LockMng() { + if (locked_) { + Unlock(); + } + } + + bool locked_{false}; + + private: + bool TryLock(); + void Unlock(); + + int32_t fd_{-1}; + }; + + public: + AkgKernelPool() = default; + virtual ~AkgKernelPool(); + + int32_t Init(const std::vector &build_args); + int32_t FetchKernels(std::set *out); + int32_t UpdateAndWait(const std::set &ids); + + constexpr inline static size_t kMaxKernelNum_{1000}; + constexpr inline static key_t kSharedMemKey_{0x57565845}; + + // allocate memory for todo_list, doing_list, done_list + constexpr inline static size_t kListNum_{3}; + + constexpr inline static auto kKeyName_ = "./akg_build_tmp.key"; + + constexpr inline static int32_t kToDoIdx_ = 0; + constexpr inline static int32_t kDoingIdx_ = 1; + constexpr inline static int32_t kDoneIdx_ = 2; + + private: + void *CreateSharedMem(const std::string &path); + std::string GetCurrentPath(); + + inline void InitKernelLists(void *addr) { + kernel_lists_[kToDoIdx_] = reinterpret_cast(addr); + kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1; + kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1; + } + + int32_t AddKernels(const std::vector &kernel_jsons); + int32_t Wait(); + + int32_t shm_id_{-1}; + bool is_creator_{false}; + int32_t fd_{-1}; + + // includes 3 lists: todo_list, doing_list, done_list. + // each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list + // is stored in kernel_lists_[xx][kMaxKernelNum_] + size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr}; + + std::set self_kernel_ids_; +}; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc index edc94673083..7d19cf65a0c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc @@ -141,14 +141,8 @@ FusionType GetFusionTypeByName(const std::string &name) { return iter->first; } -void KernelMeta::Initialize(int pid) { - if (pid == -1) { - kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/"; - } else { - kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/"; - } - // remove old kernel cache - RemoveKernelCache(); +void KernelMeta::Initialize() { + kernel_meta_path_ = std::string(kGpuKernelMeta) + "/"; #if defined(_WIN32) || defined(_WIN64) auto ret = mkdir(kernel_meta_path_.c_str()); @@ -161,21 +155,6 @@ void KernelMeta::Initialize(int pid) { initialized_ = true; } -void KernelMeta::RemoveKernelCache() { - DIR *dir = opendir(kernel_meta_path_.c_str()); - if (dir == nullptr) { - return; - } - struct dirent *entry; - while ((entry = readdir(dir)) != nullptr) { - std::string kernel_file = entry->d_name; - std::string kernel_file_realpath = kernel_meta_path_ + kernel_file; - (void)remove(kernel_file_realpath.c_str()); - } - (void)closedir(dir); - (void)rmdir(kernel_meta_path_.c_str()); -} - std::string KernelMeta::Search(const std::string &kernel_name) const { if (!initialized_) { return ""; @@ -227,7 +206,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro KernelPackPtr kernel_pack = std::make_shared(); // just a tmp solution. if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { - MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "]."; + MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "]."; return nullptr; } else { return kernel_pack; @@ -250,7 +229,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro (void)kernel_json.append(kernel_name).append(kJsonSuffix); KernelPackPtr kernel_pack = std::make_shared(); if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { - MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "]."; + MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "]."; return nullptr; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h index 9c50ea0213f..507517954bd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h @@ -55,8 +55,7 @@ using KernelMetaPtr = std::shared_ptr; class KernelMeta { public: KernelMeta() = default; - void Initialize(int pid); - void RemoveKernelCache(); + void Initialize(); std::string Search(const std::string &kernel_name) const; bool Insert(const std::string &kernel_name, const std::string &kernel_json); std::string kernel_meta_path() const { return kernel_meta_path_; } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc index 5be77aef128..6bb925b043b 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc @@ -41,8 +41,7 @@ void CreateGPUKernel(const std::vector &kernels) { if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) { if (!bin_map->initialized()) { - auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid(); - bin_map->Initialize(pid); + bin_map->Initialize(); } if (!already_check_nvcc) { already_check_nvcc = true; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index e56bdcfa5ad..f483c796075 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -272,14 +272,6 @@ void GPUKernelRuntime::ReleaseDeviceRes() { if (mem_manager_ != nullptr) { mem_manager_->FreeDeviceMemory(); } - - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (!(context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG))) { - kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - MS_EXCEPTION_IF_NULL(bin_map); - bin_map->RemoveKernelCache(); - } } void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector &inputs, diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc index 4264cdf6d81..fa92a5aac3f 100644 --- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc @@ -152,15 +152,6 @@ void GPUDeviceContext::Destroy() { mem_manager_->FreeDeviceMemory(); mem_manager_ = nullptr; } - - // Clean GPU cache kernels which is generated by AKG - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (!(context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG))) { - kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - MS_EXCEPTION_IF_NULL(bin_map); - bin_map->RemoveKernelCache(); - } } bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {