!30134 Fix bug of Debugging Compile Script

Merge pull request !30134 from jiaorui/debug-compile
This commit is contained in:
i-robot 2022-02-18 09:27:52 +00:00 committed by Gitee
commit b31c800d23
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 121 additions and 88 deletions

View File

@ -13,7 +13,7 @@
"op_bank_path": "",
"op_bank_update": false,
"op_debug_dir": "./rank_0/",
"op_debug_level": "0",
"op_debug_level": "3",
"op_impl_mode": "",
"op_impl_mode_list": [],
"socVersion": "Ascend910A",

View File

@ -20,110 +20,143 @@ from mindspore._extends.parallel_compile.tbe_compiler.tbe_job_manager import Tbe
MAX_COMPILE_SECONDS = 400
QUERY_INTERVAL = 10
class TestCompile:
def test_parallel_compilation(compile_job_json_str):
with open("Initialize.info", 'r') as init_json_file:
# Initialize
init_job_json = json.load(init_json_file)
tbe_compiler = TbeJobManager()
res = tbe_compiler.job_handler(json.dumps(init_job_json))
print("Initialize result:" + res)
res_json = json.loads(res)
for item in res_json["process_info"]:
def __init__(self):
self.tbe_compiler = TbeJobManager()
def initialize(self):
with open("Initialize.info", 'r') as init_json_file:
init_job_json = json.load(init_json_file)
res = self.tbe_compiler.job_handler(json.dumps(init_job_json))
print("Initialize result:" + res)
res_json = json.loads(res)
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
if res_json["status"] == "FAILED":
print("Initialize Failed")
return False
print("\n################# Initialize Success #################\n")
return True
@staticmethod
def process_finish_job(compile_result_json):
print("Final Compile Result:{}".format(json.dumps(compile_result_json["result"])))
print("Process Logs:")
for item in compile_result_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
res_json = json.loads(res)
if res_json["status"] == "FAILED":
print("Initialize Failed")
print("Compile Failed")
return False
print("\n################# Initialize Success #################\n")
# Dispatch Compile Job
res = tbe_compiler.job_handler(compile_job_json_str)
print("Compile result:" + res)
compile_result_json = json.loads(res)
source_id = compile_result_json["source_id"]
job_id = compile_result_json["job_id"]
if compile_result_json["status"] != "RUNNING":
# Process Finish Job
print("Final Compile Result:{}".format(json.dumps(compile_result_json["result"])))
print("Process Logs:")
for item in compile_result_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
print("\n################# Compile Success #################\n")
return True
def get_query_json(self, compile_result_json):
job_id = self.job_id + 1
query_job_json = {"source_id": self.source_id,
"job_id": job_id,
"job_type": "Query",
"job_content": {
"source_id": self.source_id,
"job_id": compile_result_json["job_id"]}
}
return query_job_json
def process_running_job(self, compile_result_json):
print("Query Running job with max compile seconds {}".format(MAX_COMPILE_SECONDS))
query_job_json = self.get_query_json(compile_result_json)
repeat_time = 0
while True:
print("Dispatch a Query Job")
res = self.tbe_compiler.job_handler(json.dumps(query_job_json))
res_json = json.loads(res)
if res_json["status"] == "FAILED":
print("Compile Failed")
return False
else:
# Process Running Job
print("Query Running job with max compile seconds {}".format(MAX_COMPILE_SECONDS))
job_id = job_id + 1
query_job_json = dict()
query_job_json["source_id"] = source_id
query_job_json["job_id"] = job_id
query_job_json["job_type"] = "Query"
target_job = dict()
target_job["source_id"] = source_id
target_job["job_id"] = compile_result_json["job_id"]
query_job_json["job_content"] = target_job
repeat_time = 0
while True:
print("Dispatch a Query Job")
res = tbe_compiler.job_handler(json.dumps(query_job_json))
res_json = json.loads(res)
print("Query result:{}".format(res))
if res_json["status"] == "SUCCESS":
print("Target Job info :{}".format(res_json["result"]))
target_job = json.loads(res_json["result"])
if target_job["status"] == "RUNNING":
job_id = job_id + 1
query_job_json["job_id"] = query_job_json["job_id"] + 1
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
repeat_time = repeat_time + 1
if repeat_time > MAX_COMPILE_SECONDS / QUERY_INTERVAL:
print("Query TimeOut")
print("\n################# Compile Failed #################\n")
break
print("Sleep {} seconds".format(QUERY_INTERVAL))
time.sleep(QUERY_INTERVAL)
else:
print("\n $$$Final Compile Result:{}\n".format(json.dumps(target_job["result"])))
print("Process Logs:")
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
print("Target Job Process Logs:")
for item in target_job["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
if target_job["status"] == "SUCCESS":
print("\n################# Compile Success #################\n")
else:
print("\n################# Compile Failed #################\n")
break
print("Query result:{}".format(res))
if res_json["status"] == "SUCCESS":
print("Target Job info :{}".format(res_json["result"]))
target_job = json.loads(res_json["result"])
if target_job["status"] == "RUNNING":
self.job_id = self.job_id + 1
query_job_json["job_id"] = query_job_json["job_id"] + 1
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
repeat_time = repeat_time + 1
if repeat_time > MAX_COMPILE_SECONDS / QUERY_INTERVAL:
print("Query TimeOut")
print("\n################# Compile Failed #################\n")
return False
print("Sleep {} seconds".format(QUERY_INTERVAL))
time.sleep(QUERY_INTERVAL)
else:
print("Final Compile Failed:{}".format(res))
print("\n $$$Final Compile Result:{}\n".format(json.dumps(target_job["result"])))
print("Process Logs:")
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
print("\n################# Compile Failed #################\n")
break
print("Target Job Process Logs:")
for item in target_job["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
if target_job["status"] == "SUCCESS":
print("\n################# Compile Success #################\n")
return True
# Finalize Job
job_id = job_id + 1
finalize_job_json = dict()
finalize_job_json["source_id"] = source_id
finalize_job_json["job_id"] = job_id
finalize_job_json["job_type"] = "Finalize"
finalize_job_json["job_content"] = dict()
res = tbe_compiler.job_handler(json.dumps(finalize_job_json))
print("\n################# Compile Failed #################\n")
return False
else:
print("Final Compile Failed:{}".format(res))
print("Process Logs:")
for item in res_json["process_info"]:
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
print("\n################# Compile Failed #################\n")
return False
def compile(self):
with open("op.info", "r") as op_json_file:
op_json = json.load(op_json_file)
res = self.tbe_compiler.job_handler(json.dumps(op_json))
print("Compile result:" + res)
compile_result_json = json.loads(res)
self.source_id = compile_result_json["source_id"]
self.job_id = compile_result_json["job_id"]
if compile_result_json["status"] != "RUNNING":
return TestCompile.process_finish_job(compile_result_json)
return self.process_running_job(compile_result_json)
def finilize(self):
job_id = self.job_id + 1
finalize_job_json = {"source_id": self.source_id,
"job_id": job_id,
"job_type": "Finalize",
"job_content":
{"SocInfo":
{"op_debug_level": "3",
"op_debug_dir": "./rank_0"
}
}
}
res = self.tbe_compiler.job_handler(json.dumps(finalize_job_json))
print("Finalize result:{}".format(res))
res_json = json.loads(res)
if res_json["status"] == "Failed":
if res_json["status"] == "FAILED":
print("\n################# Finalize Failed #################\n")
return False
print("\n################# Finalize Success #################\n")
return True
def test_parallel_compilation(self):
if not self.initialize():
return False
if not self.compile():
return False
return self.finilize()
if __name__ == "__main__":
with open("op.info", "r") as op_json_file:
op_json = json.load(op_json_file)
test_parallel_compilation(json.dumps(op_json))
TestCompile().test_parallel_compilation()