!6568 fix the bug for sending suspend command

Merge pull request !6568 from yelihua/temp-dev
This commit is contained in:
mindspore-ci-bot 2020-09-20 14:38:32 +08:00 committed by Gitee
commit 5ae77f2d51
4 changed files with 32 additions and 5 deletions

View File

@ -371,7 +371,14 @@ void GPUSession::PostIterationDbg(const std::shared_ptr<KernelGraph> &kernel_gra
}
void GPUSession::PreLoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
// check the dump_enabled and dataset_sink_mode
bool dump_enabled = DumpDataEnabledIteration();
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (dump_enabled && context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
MS_EXCEPTION(NotSupportError) << "Don't support set dataset_sink_mode to True when using e2e_dump";
}
if (!(debugger_ && (debugger_->debugger_enabled() || dump_enabled))) {
return;
}

View File

@ -283,12 +283,15 @@ void Debugger::PostExecuteNode() {
auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table);
// if kernel is watchpoint,and get hit. suspend.
bool hit_empty_flag = true;
if (is_watchpoint) {
auto hits = CheckWatchpoints(cur_name_);
if (!hits.empty()) {
SendWatchpointsAndSuspend(hits);
hit_empty_flag = false;
}
} else if (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) {
}
if (hit_empty_flag && run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_)) {
// if kernel is not watchpoint and is next_to or continue_to node, suspend
CommandLoop();
}
@ -405,7 +408,9 @@ void Debugger::CommandLoop() {
MS_LOG(ERROR) << "Error: WaitForCommand failed";
num_wait_fail++;
if (num_wait_fail > max_num_wait_fail) {
MS_LOG(ERROR) << "Maximum number of WaitForCommand retry reached: exiting training session";
MS_LOG(ERROR) << "Maximum number of WaitForCommand retry reached: exiting training session.";
MS_LOG(ERROR) << "Failed to connect to MindInsight debugger server. Please check the config "
"of debugger host and port.";
Exit();
}
MS_LOG(ERROR) << "Number of consecutive WaitForCommand fail:" << num_wait_fail << "; Retry after "
@ -417,11 +422,11 @@ void Debugger::CommandLoop() {
// get type of the command in reply
DebuggerCommand cmd = GetCommand(reply);
if (cmd == DebuggerCommand::kUnknownCMD) {
MS_LOG(DEBUG) << "Debug: debugger recieved unknown command";
MS_LOG(DEBUG) << "Debug: debugger received unknown command";
continue;
}
MS_LOG(INFO) << "recieved command: ";
MS_LOG(INFO) << "received command: ";
switch (cmd) {
case DebuggerCommand::kUnknownCMD:
MS_LOG(INFO) << "UnknownCMD";

View File

@ -13,11 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/gpu/data_saver.h"
#include <fstream>
#include <numeric>
#include "sys/stat.h"
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace profiler {
@ -183,6 +184,7 @@ void DataSaver::WriteOpType(const std::string &saver_base_dir) {
ofs << op_type_info.second << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
@ -199,6 +201,7 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
ofs << op_detail << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
@ -232,7 +235,9 @@ void DataSaver::WriteActivity(const std::string &saver_base_dir) {
}
}
ofs.close();
ChangeFileMode(file_path);
activity_timestamp_ofs.close();
ChangeFileMode(timestamp_file_path);
MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
}
}
@ -254,6 +259,14 @@ void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
ofs << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
}
void DataSaver::ChangeFileMode(const std::string &file_path) {
if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(INFO) << "Modify file:" << file_path << " to rw fail.";
return;
}
}
} // namespace gpu
} // namespace profiler

View File

@ -145,6 +145,8 @@ class DataSaver {
void WriteOpTimestamp(const std::string &saver_base_dir);
void ChangeFileMode(const std::string &file_path);
std::string device_id_;
AllActivityInfos activity_infos_;
OpTypeInfos op_type_infos_;