Fix online debugger terminate with 1 issue: I3YAPB

This commit is contained in:
TinaMengtingZhang 2021-09-21 17:53:01 -04:00
parent b6f228cc48
commit dbed33a2ad
4 changed files with 15 additions and 6 deletions

View File

@ -768,7 +768,7 @@ void Debugger::CommandLoop() {
break; break;
case DebuggerCommand::kExitCMD: case DebuggerCommand::kExitCMD:
MS_LOG(INFO) << "ExitCMD"; MS_LOG(INFO) << "ExitCMD";
Exit(); Exit(true);
// Used for debugger termination // Used for debugger termination
run = true; run = true;
break; break;
@ -1077,11 +1077,11 @@ std::list<TensorSummary> Debugger::LoadTensorsStat(const ProtoVector<TensorProto
return tensor_summary_list; return tensor_summary_list;
} }
void Debugger::Exit() { void Debugger::Exit(bool exit_success) {
// debugger will notify main thread to exit because main thread can only exit at step boundary. // debugger will notify main thread to exit because main thread can only exit at step boundary.
MS_LOG(INFO) << "Exit Debugger"; MS_LOG(INFO) << "Exit Debugger";
SetEnableHeartbeat(false); SetEnableHeartbeat(false);
pipeline::GraphExecutorPy::DebugTerminate(true); pipeline::GraphExecutorPy::DebugTerminate(true, exit_success);
} }
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel, std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel,

View File

@ -223,7 +223,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
std::list<TensorSummary> LoadTensorsStat(const ProtoVector<TensorProto> &tensors) const; std::list<TensorSummary> LoadTensorsStat(const ProtoVector<TensorProto> &tensors) const;
// terminate training process // terminate training process
void Exit(); void Exit(bool exit_success = false);
// analyze tensors and check watchpoint conditions // analyze tensors and check watchpoint conditions
// return names of tensors and what condition they hit // return names of tensors and what condition they hit

View File

@ -107,6 +107,7 @@ GraphExecutorPyPtr GraphExecutorPy::executor_ = nullptr;
std::mutex GraphExecutorPy::instance_lock_; std::mutex GraphExecutorPy::instance_lock_;
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
bool GraphExecutorPy::debugger_terminate_ = false; bool GraphExecutorPy::debugger_terminate_ = false;
bool GraphExecutorPy::exit_success_ = false;
#endif #endif
std::unordered_map<abstract::AbstractBasePtrList, uint64_t, abstract::AbstractBasePtrListHasher, std::unordered_map<abstract::AbstractBasePtrList, uint64_t, abstract::AbstractBasePtrListHasher,
@ -1004,7 +1005,11 @@ void GraphExecutorPy::TerminateDebugger() {
if (debugger_terminate_) { if (debugger_terminate_) {
MS_LOG(INFO) << "Terminate debugger and clear resources!"; MS_LOG(INFO) << "Terminate debugger and clear resources!";
ClearResAtexit(); ClearResAtexit();
exit(1); if (exit_success_) {
exit(0);
} else {
exit(1);
}
} }
} }
#endif #endif

View File

@ -109,7 +109,10 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
static void ClearRes(); static void ClearRes();
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
static bool GetDebugTerminate() { return debugger_terminate_; } static bool GetDebugTerminate() { return debugger_terminate_; }
static void DebugTerminate(bool val) { debugger_terminate_ = val; } static void DebugTerminate(bool val, bool exit_success) {
debugger_terminate_ = val;
exit_success_ = exit_success;
}
void TerminateDebugger(); void TerminateDebugger();
#endif #endif
@ -131,6 +134,7 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
static std::mutex instance_lock_; static std::mutex instance_lock_;
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
static bool debugger_terminate_; static bool debugger_terminate_;
static bool exit_success_;
#endif #endif
std::map<std::string, py::dict> stra_dict_; std::map<std::string, py::dict> stra_dict_;
std::string phase_ = ""; std::string phase_ = "";