From b661db16eba74d229792114f00e0970b52de3796 Mon Sep 17 00:00:00 2001 From: lby Date: Thu, 28 Oct 2021 21:14:15 +0800 Subject: [PATCH] fix save io size bug when using auto tune --- .../kernel_compiler/tbe/tbe_kernel_compile.cc | 41 ++++++++++--------- .../kernel_compiler/tbe/tbe_kernel_compile.h | 6 +-- .../backend/kernel_compiler/tbe/tbe_utils.cc | 29 +++++++++---- .../backend/kernel_compiler/tbe/tbe_utils.h | 2 + 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.cc index 36c671e8826..8a4546b088f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.cc @@ -412,7 +412,7 @@ void TbeKernelCompileManager::SaveSucceedTaskCompileResult(int task_id, const st if (job_type == kPreCompile) { SavePreBuildResult(task_id, compile_info); return; - } else if (job_type == kCompile) { + } else { auto task_info = task_map_[task_id]; auto json_name = task_info.json_name; TbeUtils::UpdateCache(json_name); @@ -429,15 +429,11 @@ void TbeKernelCompileManager::SaveSucceedTaskCompileResult(int task_id, const st MS_LOG(EXCEPTION) << "Save json file failed, op: " << json_name << ", compile_info:" << compile_info; } } - } else if (job_type == kFusionCompile) { - auto task_info = task_map_[task_id]; - auto json_name = task_info.json_name; - TbeUtils::UpdateCache(json_name); } } void TbeKernelCompileManager::SaveIOSizeInfo(const nlohmann::json &json, const std::string &json_name, - const std::string &job_type, const std::vector &output_nodes) { + const std::vector &output_nodes) { MS_LOG(DEBUG) << "Save io size info for " << json_name; if (kernel_io_size_info_.find(json_name) != kernel_io_size_info_.end()) { MS_LOG(DEBUG) << "Io size info of " << json_name << " already exist, skip."; @@ -446,7 +442,7 @@ void TbeKernelCompileManager::SaveIOSizeInfo(const nlohmann::json &json, const s struct KernelIOSizeInfo info; std::vector input_size_list; std::vector output_size_list; - if (job_type == kFusionCompile) { + if (!output_nodes.empty()) { (void)TbeKernelBuild::GetIOSize(json[kOpList], output_nodes, &input_size_list, &output_size_list); } else { (void)TbeKernelBuild::GetIOSize(json, &input_size_list, &output_size_list); @@ -457,16 +453,17 @@ void TbeKernelCompileManager::SaveIOSizeInfo(const nlohmann::json &json, const s kernel_io_size_info_[json_name] = info; } -void TbeKernelCompileManager::SaveTaskInfo(const std::string &job_type, const bool is_dynamic, - const nlohmann::json &json, const std::string &json_name, - const std::string &full_name, int task_id, int64_t scope_id) { +void TbeKernelCompileManager::SaveTaskInfo(const bool is_dynamic, const nlohmann::json &json, + const std::string &json_name, const std::string &full_name, int task_id, + int64_t scope_id) { struct TaskInfo task_info; task_info.json_name = json_name; task_info.full_name = full_name; task_info.build_json = json; task_info.task_id = task_id; task_info.is_dynamic = is_dynamic; - if (job_type == kFusionCompile) { + // only fusion save scope_id + if (scope_id != INT64_MAX) { task_info.scope_id = scope_id; } task_map_[task_id] = task_info; @@ -501,9 +498,8 @@ void TbeKernelCompileManager::QueryProcess(const std::string &type, const std::s MS_LOG(EXCEPTION) << "Single op compile failed, op: " << kernel_name << "\n except_msg : " << target_status.except_msg << "\n node trace: " << trace::DumpSourceLines(target_node); - } else if (type == kFusionCompile) { - MS_LOG(INFO) << "Fusion op compile failed, op: " << kernel_name - << "\n except_msg : " << target_status.except_msg; + } else { + MS_LOG(INFO) << "Op " << kernel_name << " " << type << " failed,\n except_msg : " << target_status.except_msg; (void)success_job->emplace_back(target_status.target_job_id); } } @@ -550,8 +546,13 @@ void TbeKernelCompileManager::GenKernelMod(const std::vector &node_lis auto json_name = full_name_to_json_name_[full_name]; auto kernel_pack = tbe::TbeUtils::SearchCache(json_name, false); if (kernel_pack == nullptr) { - MS_LOG(EXCEPTION) << "Can not find .json file or the .o file for op:" << json_name - << ", trace: " << trace::DumpSourceLines(node); + auto *bin_map = tbe::KernelMeta::GetInstance(); + MS_EXCEPTION_IF_NULL(bin_map); + kernel_pack = bin_map->SearchInFile(json_name); + if (kernel_pack == nullptr) { + MS_LOG(EXCEPTION) << "Can not find .json file or the .o file for op:" << json_name + << ", trace: " << trace::DumpSourceLines(node); + } } auto kernel_info_json = kernel_pack->kernel_json_info(); auto kernel_mod_ptr = std::make_shared(kernel_pack); @@ -657,7 +658,7 @@ void TbeKernelCompileManager::DistributeCompileTask(const std::vector auto json_name = json_creator->GetJsonName(); full_name_to_json_name_[full_name] = json_name; // save all task io size info for gen kernel mod - SaveIOSizeInfo(kernel_json, json_name, job_type); + SaveIOSizeInfo(kernel_json, json_name); if (job_type == kCompile || job_type == kTune) { if (single_processed_kernels_.find(json_name) != single_processed_kernels_.end()) { continue; // same op no need build @@ -676,7 +677,7 @@ void TbeKernelCompileManager::DistributeCompileTask(const std::vector } auto task_id = GetJsonValue(build_json, kJobId); auto is_dynamic = AnfAlgo::IsDynamicShape(node); - SaveTaskInfo(job_type, is_dynamic, build_json, json_name, full_name, task_id, 0); + SaveTaskInfo(is_dynamic, build_json, json_name, full_name, task_id, INT64_MAX); // save pair for exception print and get node trace (void)job_id_to_node_.insert(std::pair(task_id, node)); @@ -735,7 +736,7 @@ JsonNameMap TbeKernelCompileManager::TbeFusionOpCompile(const std::vectorGetJsonName(); full_name_to_json_name_[full_name] = json_name; all_fusion_ops_[fusion_scope_iter.scope_id] = full_name; - SaveIOSizeInfo(fusion_op, json_name, job_type, fusion_scope_iter.output_nodes); + SaveIOSizeInfo(fusion_op, json_name, fusion_scope_iter.output_nodes); if (fusion_processed_kernels_.find(json_name) != fusion_processed_kernels_.end()) { continue; // same op no need build } @@ -752,7 +753,7 @@ JsonNameMap TbeKernelCompileManager::TbeFusionOpCompile(const std::vector(json_obj, kJobId); - SaveTaskInfo(job_type, false, build_json, json_name, full_name, task_id, fusion_scope_iter.scope_id); + SaveTaskInfo(false, build_json, json_name, full_name, task_id, fusion_scope_iter.scope_id); } Query(job_type); // only fusion op compile succeed can be return diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.h index b2484e5978d..f01082120fe 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_compile.h @@ -89,8 +89,8 @@ class TbeKernelCompileManager { // tbe kernel build client interface std::string DispatchCompileTask(const nlohmann::json &kernel_json); // save all build task: pre-build, single-build, fusion-build - void SaveTaskInfo(const std::string &job_type, const bool is_dynamic, const nlohmann::json &json, - const std::string &json_name, const std::string &full_name, int task_id, int64_t scope_id); + void SaveTaskInfo(const bool is_dynamic, const nlohmann::json &json, const std::string &json_name, + const std::string &full_name, int task_id, int64_t scope_id); // after job build, save some result info // for 'pre-build', save op-pattern and output_data_desc; // for 'dynamic', save compile_res @@ -109,7 +109,7 @@ class TbeKernelCompileManager { std::string ParseSelectAndCheckResult(const nlohmann::json &json, const CNodePtr &node); void ParseTargetJobStatus(const nlohmann::json &json, TargetJobStatus *task_info); nlohmann::json TurnStrToJson(const std::string &str) const; - void SaveIOSizeInfo(const nlohmann::json &json, const std::string &json_name, const std::string &job_type, + void SaveIOSizeInfo(const nlohmann::json &json, const std::string &json_name, const std::vector &output_nodes = {}); void ClearOldTask(); void LoadPreBuildResult(const std::vector &nodes); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc index 07152136eb3..db139a3ea9d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.cc @@ -490,6 +490,25 @@ std::string TbeUtils::GetSocVersion() { return version; } +KernelPackPtr KernelMeta::LoadFromFile(const std::string &kernel_name) { + auto config_path = TbeUtils::GetOpDebugPath(); + std::string cce_json = config_path + kCceKernelMeta + kernel_name + kJsonSuffix; + auto ret = std::make_shared(); + if (!ret->LoadKernelMeta(cce_json)) { + MS_LOG(INFO) << "Read cache json and bin file failed[" << cce_json << "]"; + return nullptr; + } + return ret; +} + +KernelPackPtr KernelMeta::SearchInFile(const std::string &kernel_name) { + auto ret = LoadFromFile(kernel_name); + if (ret != nullptr) { + kernel_pack_map_[kernel_name] = ret; + } + return ret; +} + void KernelMeta::UpdateCache(const std::string &kernel_name) { auto kernel_pack_iter = kernel_pack_map_.find(kernel_name); if (kernel_pack_iter != kernel_pack_map_.end()) { @@ -497,14 +516,10 @@ void KernelMeta::UpdateCache(const std::string &kernel_name) { MS_LOG(DEBUG) << "Kernel pack already exist, skip. Kernel name:" << kernel_name; return; } - auto config_path = TbeUtils::GetOpDebugPath(); - std::string cce_json = config_path + kCceKernelMeta + kernel_name + kJsonSuffix; - auto ret = std::make_shared(); - if (!ret->LoadKernelMeta(cce_json)) { - MS_LOG(INFO) << "Read cache json and bin file failed[" << cce_json << "]"; - return; + auto ret = LoadFromFile(kernel_name); + if (ret != nullptr) { + kernel_pack_map_[kernel_name] = ret; } - kernel_pack_map_[kernel_name] = ret; } KernelPackPtr KernelMeta::GetKernelPack(const std::string &kernel_name, const bool is_akg) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h index 51266ea627c..be2a53a0294 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_utils.h @@ -97,10 +97,12 @@ class KernelMeta { bool ReadIndex(const std::string &bin_dir); KernelPackPtr GetKernelPack(const std::string &kernel_name, const bool is_akg = false); void UpdateCache(const std::string &kernel_name); + KernelPackPtr SearchInFile(const std::string &kernel_name); private: KernelMeta() = default; ~KernelMeta() = default; + KernelPackPtr LoadFromFile(const std::string &kernel_name); std::unordered_map kernel_index_map_{}; std::unordered_map kernel_pack_map_{}; };