diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py index 03afeb37ca9..ce609d06147 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py @@ -152,7 +152,6 @@ class TbeJob: result["job_type"] = self.type.value result["fusion_op_name"] = self.fusion_op_name result["result"] = self.result - self.debug("Resp result:{}".format(json.dumps(result))) process_info = [] for info in self.process_info: msg = {"index": info.index, "level": info.level.value, "message": info.info} diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py index d6bf8984cd4..e2e6e7895a8 100644 --- a/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py @@ -105,7 +105,6 @@ class TbeJobManager: fusion_op_name = "NA" if "fusion_op_name" not in job_json["job_content"] else job_json["job_content"][ "fusion_op_name"] job = TbeJob(source_id, job_id, job_type, job_json["job_content"], fusion_op_name, job_str, sys_info) - job.debug("Req job string: {}".format(job_str)) post_job(self._all_jobs, job) if not self.tbe_initialize and job.type != JobType.INITIALIZE_JOB: job.error( @@ -264,9 +263,6 @@ class TbeJobManager: return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS) target_job = get_job(self._running_jobs, target_source_id, target_job_id) if target_job: - query_job.debug("Found job in Running jobs, source_id:{}, job_id:{}".format(target_source_id, - target_job_id)) - target_job.debug("Be Queried") query_job.result = target_job.get_result() return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS) target_job = get_job(self._all_jobs, target_source_id, target_job_id) diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/ascend_kernel_compile.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/ascend_kernel_compile.cc index ee41144ce90..c3a1227c871 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/ascend_kernel_compile.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/ascend_kernel_compile.cc @@ -91,8 +91,8 @@ constexpr auto kTBE_IMPL_PATH = "MS_TBE_IMPL_PATH"; constexpr auto kTUNE_OPS_NAME = "MS_TUNE_OPS_NAME"; constexpr auto kDefPath = "/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/"; constexpr auto kBkPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/"; -constexpr int sleep_time = 2; -constexpr int tune_sleep_time = 10; +constexpr int KSleepSeconds = 3; +constexpr int KSleepInterval = 1000; constexpr int kFusionLogLevel = 1; namespace { @@ -351,6 +351,7 @@ void AscendKernelCompileManager::ParseTargetJobStatus(const std::string &type, c void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) { MS_EXCEPTION_IF_NULL(build_manager_); + size_t query_cnt = 0; while (!job_list_.empty()) { std::vector success_job; auto iter = job_list_.begin(); @@ -359,6 +360,7 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) { auto kernel_json = iter->second; JsonAssemble(kQuery, kernel_json, &query_json); auto build_result = build_manager_->ProcessTbeJob(query_json); + query_cnt++; ParseTargetJobStatus(job_type, build_result, &success_job); iter++; } @@ -367,8 +369,10 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) { } success_job.clear(); if (!job_list_.empty()) { - int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time; - sleep(s_time); + if (query_cnt % KSleepInterval == 0) { + MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt; + sleep(KSleepSeconds); + } } } } @@ -377,6 +381,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_ MS_EXCEPTION_IF_NULL(build_manager_); MS_EXCEPTION_IF_NULL(kernel_mode_ret); int build_failed_nums = 0; + size_t query_cnt = 0; while (!job_list_.empty()) { std::vector success_job; auto iter = job_list_.begin(); @@ -385,6 +390,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_ auto kernel_json = iter->second; JsonAssemble(kQuery, kernel_json, &query_json); auto build_result = build_manager_->ProcessTbeJob(query_json); + query_cnt++; auto json_obj = TurnStrToJson(build_result); if (json_obj.at(kStatus) == kSuccess) { struct TargetJobStatus task_info; @@ -416,8 +422,10 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_ } success_job.clear(); if (!job_list_.empty()) { - int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time; - sleep(s_time); + if (query_cnt % KSleepInterval == 0) { + MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt; + sleep(KSleepSeconds); + } } } MS_LOG(INFO) << "Compile Fusion Kernel Failed Num: " << build_failed_nums; diff --git a/tests/st/ops/ascend/test_tbe_ops/Initialize.info b/tests/st/ops/ascend/test_tbe_ops/Initialize.info index 3656b2caa1b..2e0c6330dbd 100644 --- a/tests/st/ops/ascend/test_tbe_ops/Initialize.info +++ b/tests/st/ops/ascend/test_tbe_ops/Initialize.info @@ -12,7 +12,7 @@ "offlineTune": false, "op_bank_path": "", "op_bank_update": false, - "op_debug_dir": "./", + "op_debug_dir": "./rank_0/", "op_debug_level": "0", "op_impl_mode": "", "op_impl_mode_list": [],