add logs when querying job

This commit is contained in:
LaiYongqiang 2021-08-14 13:08:30 +08:00
parent 2151b927ba
commit c6f5a02237
4 changed files with 15 additions and 12 deletions

View File

@ -152,7 +152,6 @@ class TbeJob:
result["job_type"] = self.type.value
result["fusion_op_name"] = self.fusion_op_name
result["result"] = self.result
self.debug("Resp result:{}".format(json.dumps(result)))
process_info = []
for info in self.process_info:
msg = {"index": info.index, "level": info.level.value, "message": info.info}

View File

@ -105,7 +105,6 @@ class TbeJobManager:
fusion_op_name = "NA" if "fusion_op_name" not in job_json["job_content"] else job_json["job_content"][
"fusion_op_name"]
job = TbeJob(source_id, job_id, job_type, job_json["job_content"], fusion_op_name, job_str, sys_info)
job.debug("Req job string: {}".format(job_str))
post_job(self._all_jobs, job)
if not self.tbe_initialize and job.type != JobType.INITIALIZE_JOB:
job.error(
@ -264,9 +263,6 @@ class TbeJobManager:
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
target_job = get_job(self._running_jobs, target_source_id, target_job_id)
if target_job:
query_job.debug("Found job in Running jobs, source_id:{}, job_id:{}".format(target_source_id,
target_job_id))
target_job.debug("Be Queried")
query_job.result = target_job.get_result()
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
target_job = get_job(self._all_jobs, target_source_id, target_job_id)

View File

@ -91,8 +91,8 @@ constexpr auto kTBE_IMPL_PATH = "MS_TBE_IMPL_PATH";
constexpr auto kTUNE_OPS_NAME = "MS_TUNE_OPS_NAME";
constexpr auto kDefPath = "/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/";
constexpr auto kBkPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/";
constexpr int sleep_time = 2;
constexpr int tune_sleep_time = 10;
constexpr int KSleepSeconds = 3;
constexpr int KSleepInterval = 1000;
constexpr int kFusionLogLevel = 1;
namespace {
@ -351,6 +351,7 @@ void AscendKernelCompileManager::ParseTargetJobStatus(const std::string &type, c
void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
MS_EXCEPTION_IF_NULL(build_manager_);
size_t query_cnt = 0;
while (!job_list_.empty()) {
std::vector<int> success_job;
auto iter = job_list_.begin();
@ -359,6 +360,7 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
auto kernel_json = iter->second;
JsonAssemble(kQuery, kernel_json, &query_json);
auto build_result = build_manager_->ProcessTbeJob(query_json);
query_cnt++;
ParseTargetJobStatus(job_type, build_result, &success_job);
iter++;
}
@ -367,8 +369,10 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
}
success_job.clear();
if (!job_list_.empty()) {
int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time;
sleep(s_time);
if (query_cnt % KSleepInterval == 0) {
MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt;
sleep(KSleepSeconds);
}
}
}
}
@ -377,6 +381,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
MS_EXCEPTION_IF_NULL(build_manager_);
MS_EXCEPTION_IF_NULL(kernel_mode_ret);
int build_failed_nums = 0;
size_t query_cnt = 0;
while (!job_list_.empty()) {
std::vector<int> success_job;
auto iter = job_list_.begin();
@ -385,6 +390,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
auto kernel_json = iter->second;
JsonAssemble(kQuery, kernel_json, &query_json);
auto build_result = build_manager_->ProcessTbeJob(query_json);
query_cnt++;
auto json_obj = TurnStrToJson(build_result);
if (json_obj.at(kStatus) == kSuccess) {
struct TargetJobStatus task_info;
@ -416,8 +422,10 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
}
success_job.clear();
if (!job_list_.empty()) {
int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time;
sleep(s_time);
if (query_cnt % KSleepInterval == 0) {
MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt;
sleep(KSleepSeconds);
}
}
}
MS_LOG(INFO) << "Compile Fusion Kernel Failed Num: " << build_failed_nums;

View File

@ -12,7 +12,7 @@
"offlineTune": false,
"op_bank_path": "",
"op_bank_update": false,
"op_debug_dir": "./",
"op_debug_dir": "./rank_0/",
"op_debug_level": "0",
"op_impl_mode": "",
"op_impl_mode_list": [],