!12192 terminate tbe process pool in separate thread, kill child of fork in 310 model converter

From: @zhoufeng54
Reviewed-by: @xu-yfei,@kisnwang
Signed-off-by: @xu-yfei
This commit is contained in:
mindspore-ci-bot 2021-02-22 19:26:57 +08:00 committed by Gitee
commit 8aba5d8f57
2 changed files with 30 additions and 7 deletions

View File

@ -13,6 +13,7 @@
# limitations under the License.
# ============================================================================
"""tbe process"""
import threading
import traceback
import multiprocessing
import subprocess
@ -137,11 +138,16 @@ class TbeProcess:
res = "TBEException", "ERROR: [MS_BUILD_PROCESS_NUM] type should be a int num, but got :" + process_num
return res
def close_pool(self):
self.__pool.terminate()
self.__pool.join()
del self.__pool
def exit(self):
if self.__pool is not None:
self.__pool.terminate()
self.__pool.join()
del self.__pool
stop_thread = threading.Thread(target=self.close_pool)
stop_thread.daemon = True
stop_thread.start()
def start_compile_op(self, op_json):
"""

View File

@ -65,13 +65,30 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall
if (pid == 0) {
ChildProcess(child_process);
shared_memory.Detach();
MS_LOG_INFO << "Model converter: child process exit";
exit(0);
MS_LOG_INFO << "Model converter: child process sleep waiting for exit signal.";
while (1) {
// waiting for signal
}
} else { // parent process
ret = ParentProcess(parent_process);
shared_memory.Detach();
int status;
wait(&status);
MS_LOG_INFO << "Model converter: parent process kills child of fork.";
(void)kill(pid, SIGKILL);
constexpr uint32_t kMaxLoopCount = 5;
bool child_exited = false;
for (uint32_t i = 0; i < kMaxLoopCount; ++i) {
int status;
if (waitpid(pid, &status, WNOHANG) == pid) {
MS_LOG(INFO) << "Child process " << pid << " exits success.";
child_exited = true;
break;
}
sleep(1);
}
if (!child_exited) {
MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed.";
}
shared_memory.Destroy();
}
return ret;