Fix online debugger terminate with 1 issue: I3YAPB

This commit is contained in:
TinaMengtingZhang 2021-09-21 17:53:01 -04:00
parent b6f228cc48
commit dbed33a2ad
4 changed files with 15 additions and 6 deletions

View File

@ -768,7 +768,7 @@ void Debugger::CommandLoop() {
break;
case DebuggerCommand::kExitCMD:
MS_LOG(INFO) << "ExitCMD";
Exit();
Exit(true);
// Used for debugger termination
run = true;
break;
@ -1077,11 +1077,11 @@ std::list<TensorSummary> Debugger::LoadTensorsStat(const ProtoVector<TensorProto
return tensor_summary_list;
}
void Debugger::Exit() {
void Debugger::Exit(bool exit_success) {
// debugger will notify main thread to exit because main thread can only exit at step boundary.
MS_LOG(INFO) << "Exit Debugger";
SetEnableHeartbeat(false);
pipeline::GraphExecutorPy::DebugTerminate(true);
pipeline::GraphExecutorPy::DebugTerminate(true, exit_success);
}
std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode, const CNodePtr &kernel,

View File

@ -223,7 +223,7 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
std::list<TensorSummary> LoadTensorsStat(const ProtoVector<TensorProto> &tensors) const;
// terminate training process
void Exit();
void Exit(bool exit_success = false);
// analyze tensors and check watchpoint conditions
// return names of tensors and what condition they hit

View File

@ -107,6 +107,7 @@ GraphExecutorPyPtr GraphExecutorPy::executor_ = nullptr;
std::mutex GraphExecutorPy::instance_lock_;
#ifdef ENABLE_DEBUGGER
bool GraphExecutorPy::debugger_terminate_ = false;
bool GraphExecutorPy::exit_success_ = false;
#endif
std::unordered_map<abstract::AbstractBasePtrList, uint64_t, abstract::AbstractBasePtrListHasher,
@ -1004,7 +1005,11 @@ void GraphExecutorPy::TerminateDebugger() {
if (debugger_terminate_) {
MS_LOG(INFO) << "Terminate debugger and clear resources!";
ClearResAtexit();
exit(1);
if (exit_success_) {
exit(0);
} else {
exit(1);
}
}
}
#endif

View File

@ -109,7 +109,10 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
static void ClearRes();
#ifdef ENABLE_DEBUGGER
static bool GetDebugTerminate() { return debugger_terminate_; }
static void DebugTerminate(bool val) { debugger_terminate_ = val; }
static void DebugTerminate(bool val, bool exit_success) {
debugger_terminate_ = val;
exit_success_ = exit_success;
}
void TerminateDebugger();
#endif
@ -131,6 +134,7 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
static std::mutex instance_lock_;
#ifdef ENABLE_DEBUGGER
static bool debugger_terminate_;
static bool exit_success_;
#endif
std::map<std::string, py::dict> stra_dict_;
std::string phase_ = "";