forked from mindspore-Ecosystem/mindspore
!30134 Fix bug of Debugging Compile Script
Merge pull request !30134 from jiaorui/debug-compile
This commit is contained in:
commit
b31c800d23
|
@ -13,7 +13,7 @@
|
|||
"op_bank_path": "",
|
||||
"op_bank_update": false,
|
||||
"op_debug_dir": "./rank_0/",
|
||||
"op_debug_level": "0",
|
||||
"op_debug_level": "3",
|
||||
"op_impl_mode": "",
|
||||
"op_impl_mode_list": [],
|
||||
"socVersion": "Ascend910A",
|
||||
|
|
|
@ -20,110 +20,143 @@ from mindspore._extends.parallel_compile.tbe_compiler.tbe_job_manager import Tbe
|
|||
MAX_COMPILE_SECONDS = 400
|
||||
QUERY_INTERVAL = 10
|
||||
|
||||
class TestCompile:
|
||||
|
||||
def test_parallel_compilation(compile_job_json_str):
|
||||
with open("Initialize.info", 'r') as init_json_file:
|
||||
# Initialize
|
||||
init_job_json = json.load(init_json_file)
|
||||
tbe_compiler = TbeJobManager()
|
||||
res = tbe_compiler.job_handler(json.dumps(init_job_json))
|
||||
print("Initialize result:" + res)
|
||||
res_json = json.loads(res)
|
||||
for item in res_json["process_info"]:
|
||||
def __init__(self):
|
||||
self.tbe_compiler = TbeJobManager()
|
||||
|
||||
def initialize(self):
|
||||
with open("Initialize.info", 'r') as init_json_file:
|
||||
init_job_json = json.load(init_json_file)
|
||||
res = self.tbe_compiler.job_handler(json.dumps(init_job_json))
|
||||
print("Initialize result:" + res)
|
||||
res_json = json.loads(res)
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
|
||||
if res_json["status"] == "FAILED":
|
||||
print("Initialize Failed")
|
||||
return False
|
||||
|
||||
print("\n################# Initialize Success #################\n")
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def process_finish_job(compile_result_json):
|
||||
print("Final Compile Result:{}".format(json.dumps(compile_result_json["result"])))
|
||||
print("Process Logs:")
|
||||
for item in compile_result_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
res_json = json.loads(res)
|
||||
if res_json["status"] == "FAILED":
|
||||
print("Initialize Failed")
|
||||
print("Compile Failed")
|
||||
return False
|
||||
|
||||
print("\n################# Initialize Success #################\n")
|
||||
# Dispatch Compile Job
|
||||
res = tbe_compiler.job_handler(compile_job_json_str)
|
||||
print("Compile result:" + res)
|
||||
compile_result_json = json.loads(res)
|
||||
source_id = compile_result_json["source_id"]
|
||||
job_id = compile_result_json["job_id"]
|
||||
if compile_result_json["status"] != "RUNNING":
|
||||
# Process Finish Job
|
||||
print("Final Compile Result:{}".format(json.dumps(compile_result_json["result"])))
|
||||
print("Process Logs:")
|
||||
for item in compile_result_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
print("\n################# Compile Success #################\n")
|
||||
return True
|
||||
|
||||
def get_query_json(self, compile_result_json):
|
||||
job_id = self.job_id + 1
|
||||
query_job_json = {"source_id": self.source_id,
|
||||
"job_id": job_id,
|
||||
"job_type": "Query",
|
||||
"job_content": {
|
||||
"source_id": self.source_id,
|
||||
"job_id": compile_result_json["job_id"]}
|
||||
}
|
||||
return query_job_json
|
||||
|
||||
def process_running_job(self, compile_result_json):
|
||||
print("Query Running job with max compile seconds {}".format(MAX_COMPILE_SECONDS))
|
||||
query_job_json = self.get_query_json(compile_result_json)
|
||||
repeat_time = 0
|
||||
while True:
|
||||
print("Dispatch a Query Job")
|
||||
res = self.tbe_compiler.job_handler(json.dumps(query_job_json))
|
||||
res_json = json.loads(res)
|
||||
if res_json["status"] == "FAILED":
|
||||
print("Compile Failed")
|
||||
return False
|
||||
else:
|
||||
# Process Running Job
|
||||
print("Query Running job with max compile seconds {}".format(MAX_COMPILE_SECONDS))
|
||||
job_id = job_id + 1
|
||||
query_job_json = dict()
|
||||
query_job_json["source_id"] = source_id
|
||||
query_job_json["job_id"] = job_id
|
||||
query_job_json["job_type"] = "Query"
|
||||
target_job = dict()
|
||||
target_job["source_id"] = source_id
|
||||
target_job["job_id"] = compile_result_json["job_id"]
|
||||
query_job_json["job_content"] = target_job
|
||||
repeat_time = 0
|
||||
while True:
|
||||
print("Dispatch a Query Job")
|
||||
res = tbe_compiler.job_handler(json.dumps(query_job_json))
|
||||
res_json = json.loads(res)
|
||||
print("Query result:{}".format(res))
|
||||
if res_json["status"] == "SUCCESS":
|
||||
print("Target Job info :{}".format(res_json["result"]))
|
||||
target_job = json.loads(res_json["result"])
|
||||
if target_job["status"] == "RUNNING":
|
||||
job_id = job_id + 1
|
||||
query_job_json["job_id"] = query_job_json["job_id"] + 1
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
repeat_time = repeat_time + 1
|
||||
if repeat_time > MAX_COMPILE_SECONDS / QUERY_INTERVAL:
|
||||
print("Query TimeOut")
|
||||
print("\n################# Compile Failed #################\n")
|
||||
break
|
||||
print("Sleep {} seconds".format(QUERY_INTERVAL))
|
||||
time.sleep(QUERY_INTERVAL)
|
||||
else:
|
||||
print("\n $$$Final Compile Result:{}\n".format(json.dumps(target_job["result"])))
|
||||
print("Process Logs:")
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
print("Target Job Process Logs:")
|
||||
for item in target_job["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
if target_job["status"] == "SUCCESS":
|
||||
print("\n################# Compile Success #################\n")
|
||||
else:
|
||||
print("\n################# Compile Failed #################\n")
|
||||
break
|
||||
print("Query result:{}".format(res))
|
||||
if res_json["status"] == "SUCCESS":
|
||||
print("Target Job info :{}".format(res_json["result"]))
|
||||
target_job = json.loads(res_json["result"])
|
||||
if target_job["status"] == "RUNNING":
|
||||
self.job_id = self.job_id + 1
|
||||
query_job_json["job_id"] = query_job_json["job_id"] + 1
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
repeat_time = repeat_time + 1
|
||||
if repeat_time > MAX_COMPILE_SECONDS / QUERY_INTERVAL:
|
||||
print("Query TimeOut")
|
||||
print("\n################# Compile Failed #################\n")
|
||||
return False
|
||||
print("Sleep {} seconds".format(QUERY_INTERVAL))
|
||||
time.sleep(QUERY_INTERVAL)
|
||||
else:
|
||||
print("Final Compile Failed:{}".format(res))
|
||||
print("\n $$$Final Compile Result:{}\n".format(json.dumps(target_job["result"])))
|
||||
print("Process Logs:")
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
print("\n################# Compile Failed #################\n")
|
||||
break
|
||||
print("Target Job Process Logs:")
|
||||
for item in target_job["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
if target_job["status"] == "SUCCESS":
|
||||
print("\n################# Compile Success #################\n")
|
||||
return True
|
||||
|
||||
# Finalize Job
|
||||
job_id = job_id + 1
|
||||
finalize_job_json = dict()
|
||||
finalize_job_json["source_id"] = source_id
|
||||
finalize_job_json["job_id"] = job_id
|
||||
finalize_job_json["job_type"] = "Finalize"
|
||||
finalize_job_json["job_content"] = dict()
|
||||
res = tbe_compiler.job_handler(json.dumps(finalize_job_json))
|
||||
print("\n################# Compile Failed #################\n")
|
||||
return False
|
||||
|
||||
else:
|
||||
print("Final Compile Failed:{}".format(res))
|
||||
print("Process Logs:")
|
||||
for item in res_json["process_info"]:
|
||||
print("### LogLevel:" + str(item["level"]) + " " + item["message"])
|
||||
print("\n################# Compile Failed #################\n")
|
||||
return False
|
||||
|
||||
def compile(self):
|
||||
with open("op.info", "r") as op_json_file:
|
||||
op_json = json.load(op_json_file)
|
||||
res = self.tbe_compiler.job_handler(json.dumps(op_json))
|
||||
print("Compile result:" + res)
|
||||
compile_result_json = json.loads(res)
|
||||
self.source_id = compile_result_json["source_id"]
|
||||
self.job_id = compile_result_json["job_id"]
|
||||
if compile_result_json["status"] != "RUNNING":
|
||||
return TestCompile.process_finish_job(compile_result_json)
|
||||
|
||||
return self.process_running_job(compile_result_json)
|
||||
|
||||
def finilize(self):
|
||||
job_id = self.job_id + 1
|
||||
finalize_job_json = {"source_id": self.source_id,
|
||||
"job_id": job_id,
|
||||
"job_type": "Finalize",
|
||||
"job_content":
|
||||
{"SocInfo":
|
||||
{"op_debug_level": "3",
|
||||
"op_debug_dir": "./rank_0"
|
||||
}
|
||||
}
|
||||
}
|
||||
res = self.tbe_compiler.job_handler(json.dumps(finalize_job_json))
|
||||
print("Finalize result:{}".format(res))
|
||||
res_json = json.loads(res)
|
||||
if res_json["status"] == "Failed":
|
||||
if res_json["status"] == "FAILED":
|
||||
print("\n################# Finalize Failed #################\n")
|
||||
return False
|
||||
|
||||
print("\n################# Finalize Success #################\n")
|
||||
return True
|
||||
|
||||
def test_parallel_compilation(self):
|
||||
if not self.initialize():
|
||||
return False
|
||||
|
||||
if not self.compile():
|
||||
return False
|
||||
|
||||
return self.finilize()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("op.info", "r") as op_json_file:
|
||||
op_json = json.load(op_json_file)
|
||||
test_parallel_compilation(json.dumps(op_json))
|
||||
TestCompile().test_parallel_compilation()
|
||||
|
|
Loading…
Reference in New Issue