forked from mindspore-Ecosystem/mindspore
add logs when querying job
This commit is contained in:
parent
2151b927ba
commit
c6f5a02237
|
@ -152,7 +152,6 @@ class TbeJob:
|
||||||
result["job_type"] = self.type.value
|
result["job_type"] = self.type.value
|
||||||
result["fusion_op_name"] = self.fusion_op_name
|
result["fusion_op_name"] = self.fusion_op_name
|
||||||
result["result"] = self.result
|
result["result"] = self.result
|
||||||
self.debug("Resp result:{}".format(json.dumps(result)))
|
|
||||||
process_info = []
|
process_info = []
|
||||||
for info in self.process_info:
|
for info in self.process_info:
|
||||||
msg = {"index": info.index, "level": info.level.value, "message": info.info}
|
msg = {"index": info.index, "level": info.level.value, "message": info.info}
|
||||||
|
|
|
@ -105,7 +105,6 @@ class TbeJobManager:
|
||||||
fusion_op_name = "NA" if "fusion_op_name" not in job_json["job_content"] else job_json["job_content"][
|
fusion_op_name = "NA" if "fusion_op_name" not in job_json["job_content"] else job_json["job_content"][
|
||||||
"fusion_op_name"]
|
"fusion_op_name"]
|
||||||
job = TbeJob(source_id, job_id, job_type, job_json["job_content"], fusion_op_name, job_str, sys_info)
|
job = TbeJob(source_id, job_id, job_type, job_json["job_content"], fusion_op_name, job_str, sys_info)
|
||||||
job.debug("Req job string: {}".format(job_str))
|
|
||||||
post_job(self._all_jobs, job)
|
post_job(self._all_jobs, job)
|
||||||
if not self.tbe_initialize and job.type != JobType.INITIALIZE_JOB:
|
if not self.tbe_initialize and job.type != JobType.INITIALIZE_JOB:
|
||||||
job.error(
|
job.error(
|
||||||
|
@ -264,9 +263,6 @@ class TbeJobManager:
|
||||||
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
|
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
|
||||||
target_job = get_job(self._running_jobs, target_source_id, target_job_id)
|
target_job = get_job(self._running_jobs, target_source_id, target_job_id)
|
||||||
if target_job:
|
if target_job:
|
||||||
query_job.debug("Found job in Running jobs, source_id:{}, job_id:{}".format(target_source_id,
|
|
||||||
target_job_id))
|
|
||||||
target_job.debug("Be Queried")
|
|
||||||
query_job.result = target_job.get_result()
|
query_job.result = target_job.get_result()
|
||||||
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
|
return self.add_to_finished_jobs(query_job, JobStatus.JOB_SUCCESS)
|
||||||
target_job = get_job(self._all_jobs, target_source_id, target_job_id)
|
target_job = get_job(self._all_jobs, target_source_id, target_job_id)
|
||||||
|
|
|
@ -91,8 +91,8 @@ constexpr auto kTBE_IMPL_PATH = "MS_TBE_IMPL_PATH";
|
||||||
constexpr auto kTUNE_OPS_NAME = "MS_TUNE_OPS_NAME";
|
constexpr auto kTUNE_OPS_NAME = "MS_TUNE_OPS_NAME";
|
||||||
constexpr auto kDefPath = "/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/";
|
constexpr auto kDefPath = "/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/";
|
||||||
constexpr auto kBkPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/";
|
constexpr auto kBkPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/";
|
||||||
constexpr int sleep_time = 2;
|
constexpr int KSleepSeconds = 3;
|
||||||
constexpr int tune_sleep_time = 10;
|
constexpr int KSleepInterval = 1000;
|
||||||
constexpr int kFusionLogLevel = 1;
|
constexpr int kFusionLogLevel = 1;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -351,6 +351,7 @@ void AscendKernelCompileManager::ParseTargetJobStatus(const std::string &type, c
|
||||||
|
|
||||||
void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
|
void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
|
||||||
MS_EXCEPTION_IF_NULL(build_manager_);
|
MS_EXCEPTION_IF_NULL(build_manager_);
|
||||||
|
size_t query_cnt = 0;
|
||||||
while (!job_list_.empty()) {
|
while (!job_list_.empty()) {
|
||||||
std::vector<int> success_job;
|
std::vector<int> success_job;
|
||||||
auto iter = job_list_.begin();
|
auto iter = job_list_.begin();
|
||||||
|
@ -359,6 +360,7 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
|
||||||
auto kernel_json = iter->second;
|
auto kernel_json = iter->second;
|
||||||
JsonAssemble(kQuery, kernel_json, &query_json);
|
JsonAssemble(kQuery, kernel_json, &query_json);
|
||||||
auto build_result = build_manager_->ProcessTbeJob(query_json);
|
auto build_result = build_manager_->ProcessTbeJob(query_json);
|
||||||
|
query_cnt++;
|
||||||
ParseTargetJobStatus(job_type, build_result, &success_job);
|
ParseTargetJobStatus(job_type, build_result, &success_job);
|
||||||
iter++;
|
iter++;
|
||||||
}
|
}
|
||||||
|
@ -367,8 +369,10 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
|
||||||
}
|
}
|
||||||
success_job.clear();
|
success_job.clear();
|
||||||
if (!job_list_.empty()) {
|
if (!job_list_.empty()) {
|
||||||
int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time;
|
if (query_cnt % KSleepInterval == 0) {
|
||||||
sleep(s_time);
|
MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt;
|
||||||
|
sleep(KSleepSeconds);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -377,6 +381,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
|
||||||
MS_EXCEPTION_IF_NULL(build_manager_);
|
MS_EXCEPTION_IF_NULL(build_manager_);
|
||||||
MS_EXCEPTION_IF_NULL(kernel_mode_ret);
|
MS_EXCEPTION_IF_NULL(kernel_mode_ret);
|
||||||
int build_failed_nums = 0;
|
int build_failed_nums = 0;
|
||||||
|
size_t query_cnt = 0;
|
||||||
while (!job_list_.empty()) {
|
while (!job_list_.empty()) {
|
||||||
std::vector<int> success_job;
|
std::vector<int> success_job;
|
||||||
auto iter = job_list_.begin();
|
auto iter = job_list_.begin();
|
||||||
|
@ -385,6 +390,7 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
|
||||||
auto kernel_json = iter->second;
|
auto kernel_json = iter->second;
|
||||||
JsonAssemble(kQuery, kernel_json, &query_json);
|
JsonAssemble(kQuery, kernel_json, &query_json);
|
||||||
auto build_result = build_manager_->ProcessTbeJob(query_json);
|
auto build_result = build_manager_->ProcessTbeJob(query_json);
|
||||||
|
query_cnt++;
|
||||||
auto json_obj = TurnStrToJson(build_result);
|
auto json_obj = TurnStrToJson(build_result);
|
||||||
if (json_obj.at(kStatus) == kSuccess) {
|
if (json_obj.at(kStatus) == kSuccess) {
|
||||||
struct TargetJobStatus task_info;
|
struct TargetJobStatus task_info;
|
||||||
|
@ -416,8 +422,10 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
|
||||||
}
|
}
|
||||||
success_job.clear();
|
success_job.clear();
|
||||||
if (!job_list_.empty()) {
|
if (!job_list_.empty()) {
|
||||||
int s_time = is_tune_flag_ ? tune_sleep_time : sleep_time;
|
if (query_cnt % KSleepInterval == 0) {
|
||||||
sleep(s_time);
|
MS_LOG(INFO) << "Querying Parallel Compilation Job, Current Query Count: " << query_cnt;
|
||||||
|
sleep(KSleepSeconds);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "Compile Fusion Kernel Failed Num: " << build_failed_nums;
|
MS_LOG(INFO) << "Compile Fusion Kernel Failed Num: " << build_failed_nums;
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
"offlineTune": false,
|
"offlineTune": false,
|
||||||
"op_bank_path": "",
|
"op_bank_path": "",
|
||||||
"op_bank_update": false,
|
"op_bank_update": false,
|
||||||
"op_debug_dir": "./",
|
"op_debug_dir": "./rank_0/",
|
||||||
"op_debug_level": "0",
|
"op_debug_level": "0",
|
||||||
"op_impl_mode": "",
|
"op_impl_mode": "",
|
||||||
"op_impl_mode_list": [],
|
"op_impl_mode_list": [],
|
||||||
|
|
Loading…
Reference in New Issue