forked from mindspore-Ecosystem/mindspore
!23732 Fix code self-check issues for online dbg in master
Merge pull request !23732 from TinaMengtingZhang/code_self_check_master
This commit is contained in:
commit
38bab297ec
|
@ -1863,6 +1863,7 @@ void AscendSession::LoadGraphsToDbg(NotNull<KernelGraphPtr> graph,
|
|||
|
||||
MS_LOG(INFO) << "Start to do LoadGraphsToDbg in graph: " << graph->graph_id();
|
||||
|
||||
MS_EXCEPTION_IF_NULL(debugger_);
|
||||
debugger_->LoadGraphs(graph);
|
||||
MS_LOG(INFO) << "graph_sum_: " << graph_sum_;
|
||||
for (auto &child_graph : graph->child_graph_order()) {
|
||||
|
|
|
@ -161,6 +161,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
|
|||
debugger_ = Debugger::GetInstance();
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
MS_EXCEPTION_IF_NULL(debugger_);
|
||||
debugger_->Init(device_id_, ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET));
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -181,8 +181,8 @@ std::optional<std::string> Common::GetConfigFile(const std::string &env) {
|
|||
if (env.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid env";
|
||||
}
|
||||
auto config_path_str = std::getenv(env.c_str());
|
||||
if (config_path_str == nullptr) {
|
||||
auto config_path_str = common::GetEnv(env);
|
||||
if (config_path_str.empty()) {
|
||||
MS_LOG(ERROR) << "Please export env:" << env;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ void DumpJsonParser::CopyJsonToDir(uint32_t rank_id) {
|
|||
}
|
||||
auto dump_config_file = Common::GetConfigFile(kMindsporeDumpConfig);
|
||||
if (!dump_config_file.has_value()) {
|
||||
MS_LOG(EXCEPTION) << "Get dump config file failed";
|
||||
MS_LOG(EXCEPTION) << "Get dump config file failed.";
|
||||
}
|
||||
std::ifstream json_file(dump_config_file.value());
|
||||
if (async_dump_enabled_ || e2e_dump_enabled_) {
|
||||
|
|
|
@ -52,7 +52,6 @@ class DumpJsonParser {
|
|||
std::string path() const { return path_; }
|
||||
std::string iteration_string() const { return iteration_; }
|
||||
std::string net_name() const { return net_name_; }
|
||||
uint32_t input_output() const { return input_output_; }
|
||||
uint32_t op_debug_mode() const { return op_debug_mode_; }
|
||||
bool trans_flag() const { return trans_flag_; }
|
||||
uint32_t cur_dump_iter() const { return cur_dump_iter_; }
|
||||
|
|
|
@ -411,14 +411,18 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
|
|||
std::vector<unsigned int> *root_graph_id) {
|
||||
std::lock_guard<std::mutex> lg(lock_);
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
if (watchpoint_table_.empty()) return;
|
||||
if (watchpoint_table_.empty()) {
|
||||
return;
|
||||
}
|
||||
// vector to store execution order of tensors hit
|
||||
std::vector<int> exec_order;
|
||||
std::vector<std::string> time_stamps;
|
||||
int tensor_list_size = tensor_list->size();
|
||||
uint64_t tensor_list_byte_size = 0;
|
||||
MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
|
||||
if (tensor_list_size == 0) return;
|
||||
if (tensor_list_size == 0) {
|
||||
return;
|
||||
}
|
||||
// default value for number of threads
|
||||
const int default_thread_num = 16;
|
||||
int max_thread_num = default_thread_num;
|
||||
|
@ -1166,7 +1170,7 @@ void DebugServices::ReadNodesTensors(const std::vector<std::string> &name, std::
|
|||
tensor_loader_->SearchTensors(name, &result_list);
|
||||
|
||||
for (auto result : result_list) {
|
||||
if (!std::get<1>(result)) {
|
||||
if (std::get<1>(result) == nullptr) {
|
||||
continue;
|
||||
}
|
||||
ret_name->push_back(std::get<0>(result));
|
||||
|
@ -1206,7 +1210,7 @@ bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr
|
|||
}
|
||||
|
||||
bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const {
|
||||
if (kernel && w_name.length() > 0) {
|
||||
if (kernel != nullptr && w_name.length() > 0) {
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
|
||||
for (size_t j = 0; j < input_size; ++j) {
|
||||
auto input_kernel = kernel->input(j + 1);
|
||||
|
@ -1222,14 +1226,8 @@ bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNode
|
|||
}
|
||||
#endif
|
||||
|
||||
void DebugServices::EmptyTensor() { tensor_loader_->EmptyTensor(); }
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> DebugServices::GetTensor() const { return tensor_loader_->GetTensor(); }
|
||||
|
||||
uint32_t DebugServices::GetTensorLoaderIterNum() const { return tensor_loader_->GetIterNum(); }
|
||||
|
||||
void DebugServices::SetTensorLoaderIterNum(uint32_t iter_num) { tensor_loader_->set_iter_num(iter_num); }
|
||||
|
||||
void DebugServices::EmptyCurrentTensor() { tensor_loader_->EmptyCurrentTensor(); }
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
|
@ -1246,10 +1244,6 @@ bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, boo
|
|||
return tensor_loader_->LoadNewTensor(tensor, keep_prev);
|
||||
}
|
||||
|
||||
std::unordered_map<unsigned int, DebugServices::watchpoint_t> DebugServices::GetWatchpointTable() {
|
||||
return watchpoint_table_;
|
||||
}
|
||||
|
||||
void DebugServices::ResetLoadedTensors() {
|
||||
wp_id_cache_.clear();
|
||||
MS_LOG(INFO) << "Resetting loaded tensors";
|
||||
|
@ -1269,7 +1263,9 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::GetNodeTensor(const CNod
|
|||
for (size_t j = 0; j < output_size; ++j) {
|
||||
auto tensor_name_with_slot = kernel_name + ":" + std::to_string(j);
|
||||
auto tensor = tensor_loader_->GetTensor(tensor_name_with_slot);
|
||||
if (tensor) result.push_back(tensor);
|
||||
if (tensor != nullptr) {
|
||||
result.push_back(tensor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -90,7 +90,9 @@ class DebugServices {
|
|||
bool hit;
|
||||
double_t actual_value;
|
||||
void Evaluate(double_t actualValue, std::string inequality_type) {
|
||||
if (std::isnan(actualValue)) return;
|
||||
if (std::isnan(actualValue)) {
|
||||
return;
|
||||
}
|
||||
|
||||
actual_value = actualValue;
|
||||
// if cannot extract inequality type from watchpoint
|
||||
|
@ -164,17 +166,6 @@ class DebugServices {
|
|||
condition.type == SD_LT || condition.type == MAX_MIN_LT;
|
||||
}
|
||||
|
||||
bool min_max_enabled() const {
|
||||
return condition.type == MAX_LT || condition.type == MAX_GT || condition.type == MIN_LT ||
|
||||
condition.type == MIN_GT || condition.type == MAX_MIN_LT || condition.type == MAX_MIN_GT ||
|
||||
(condition.type == INIT && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
|
||||
(condition.type == TOO_LARGE && (!parameter_list[1].disabled || !parameter_list[2].disabled)) ||
|
||||
(condition.type == TOO_SMALL && (!parameter_list[1].disabled || !parameter_list[2].disabled));
|
||||
}
|
||||
// inf or nan related condition set
|
||||
bool inf_nan_enabled() const {
|
||||
return condition.type == HAS_INF || condition.type == HAS_NAN || condition.type == GENERAL_OVERFLOW;
|
||||
}
|
||||
// mean or sd related condition set
|
||||
bool mean_sd_enabled() const {
|
||||
return condition.type == MEAN_LT || condition.type == MEAN_GT || condition.type == SD_LT ||
|
||||
|
@ -185,7 +176,6 @@ class DebugServices {
|
|||
return (condition.type == TOO_LARGE && !parameter_list[0].disabled) ||
|
||||
(condition.type == TOO_SMALL && !parameter_list[0].disabled);
|
||||
}
|
||||
bool zero_percentage_enabled() const { return condition.type == ALL_ZERO || condition.type == INIT; }
|
||||
|
||||
bool tensor_update_ratio_mean_enabled() const {
|
||||
return condition.type == CHANGE_TOO_LARGE || condition.type == CHANGE_TOO_SMALL;
|
||||
|
@ -372,16 +362,11 @@ class DebugServices {
|
|||
|
||||
bool IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const;
|
||||
#endif
|
||||
void EmptyTensor();
|
||||
|
||||
std::vector<std::shared_ptr<TensorData>> GetTensor() const;
|
||||
|
||||
void AddAnalyzedTensorToCache(const bool recheck, const unsigned int id, const std::string &tensor_name);
|
||||
|
||||
uint32_t GetTensorLoaderIterNum() const;
|
||||
|
||||
void SetTensorLoaderIterNum(uint32_t iter_num);
|
||||
|
||||
void EmptyCurrentTensor();
|
||||
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
|
@ -392,8 +377,6 @@ class DebugServices {
|
|||
|
||||
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev);
|
||||
|
||||
std::unordered_map<unsigned int, watchpoint_t> GetWatchpointTable();
|
||||
|
||||
void ResetLoadedTensors();
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
std::vector<std::shared_ptr<TensorData>> GetNodeTensor(const CNodePtr &kernel);
|
||||
|
|
|
@ -291,6 +291,7 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
|
|||
}
|
||||
}
|
||||
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
CheckDatasetSinkMode();
|
||||
|
@ -379,7 +380,7 @@ uint32_t Debugger::GetRankID() {
|
|||
}
|
||||
void Debugger::Dump(const KernelGraphPtr &kernel_graph) const {
|
||||
uint32_t rank_id = GetRankID();
|
||||
if (debugger_->DebuggerBackendEnabled()) {
|
||||
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
(void)E2eDump::DumpParametersAndConstData(kernel_graph.get(), rank_id, debugger_.get());
|
||||
} else {
|
||||
|
@ -388,7 +389,7 @@ void Debugger::Dump(const KernelGraphPtr &kernel_graph) const {
|
|||
}
|
||||
|
||||
void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id) {
|
||||
if (debugger_->DebuggerBackendEnabled()) {
|
||||
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
|
||||
uint32_t rank_id = GetRankID();
|
||||
(void)E2eDump::DumpSingleNodeData(node, graph_id, rank_id, debugger_.get());
|
||||
}
|
||||
|
@ -429,8 +430,10 @@ void Debugger::PostExecuteGraphDebugger() {
|
|||
return;
|
||||
}
|
||||
// LoadParametersAndConst for all the graphs
|
||||
for (auto graph : graph_ptr_list_) {
|
||||
debugger_->LoadParametersAndConst(graph);
|
||||
if (debugger_) {
|
||||
for (auto graph : graph_ptr_list_) {
|
||||
debugger_->LoadParametersAndConst(graph);
|
||||
}
|
||||
}
|
||||
// debug used for dump
|
||||
if (debugger_ && debugger_->CheckDebuggerDumpEnabled()) {
|
||||
|
@ -453,7 +456,7 @@ void Debugger::PostExecute() {
|
|||
if (pipeline::GraphExecutorPy::GetDebugTerminate()) {
|
||||
return;
|
||||
}
|
||||
if (debugger_->DebuggerBackendEnabled()) {
|
||||
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
|
||||
// analyze tensor data and send the watchpoints been hit
|
||||
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||
if (device_target_ != kGPUDevice) {
|
||||
|
@ -516,17 +519,8 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
|
|||
}
|
||||
}
|
||||
|
||||
void Debugger::PostDebugOp() {
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
// suspend if debugger is enabled
|
||||
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||
MS_LOG(INFO) << "Debugger suspend at debug_op";
|
||||
CommandLoop();
|
||||
}
|
||||
}
|
||||
|
||||
void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
if (graph_ptr_ != graph_ptr) {
|
||||
MS_LOG(INFO) << "LoadGraphs Debugger got new graph: " << graph_ptr->graph_id();
|
||||
// save new graph_ptr
|
||||
|
@ -547,6 +541,7 @@ void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) {
|
|||
|
||||
// In single graph cases, check single graph ptr
|
||||
void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
if (graph_ptr_ != graph_ptr) {
|
||||
MS_LOG(INFO) << "CheckGraphPtr Debugger got new graph: " << graph_ptr->graph_id();
|
||||
// save new graph_ptr
|
||||
|
@ -566,6 +561,7 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
|
|||
|
||||
void Debugger::CheckDatasetGraph() {
|
||||
// print parameter node names
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
const auto ¶ms = graph_ptr_->inputs();
|
||||
for (const auto ¶m : params) {
|
||||
MS_LOG(INFO) << "param: " << GetKernelNodeName(param);
|
||||
|
@ -602,6 +598,7 @@ void Debugger::SendHeartbeat(int32_t period) {
|
|||
|
||||
SetEnableHeartbeat(CheckDebuggerEnabled());
|
||||
while (enable_heartbeat_) {
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply reply = grpc_client_->SendHeartbeat(heartbeat);
|
||||
|
||||
if (reply.status() != reply.OK) {
|
||||
|
@ -624,6 +621,7 @@ void Debugger::SendHeartbeat(int32_t period) {
|
|||
void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
|
||||
if (SendMetadata(true)) {
|
||||
// send graph to Mindinsight server
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply reply = grpc_client_->SendGraph(graph_proto);
|
||||
if (reply.status() != reply.OK) {
|
||||
MS_LOG(ERROR) << "Error: SendGraph failed";
|
||||
|
@ -635,6 +633,7 @@ void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
|
|||
|
||||
bool Debugger::SendMetadata(bool version_check) {
|
||||
// prepare metadata
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id());
|
||||
Metadata metadata;
|
||||
metadata.set_device_name(device_name);
|
||||
|
@ -647,6 +646,7 @@ bool Debugger::SendMetadata(bool version_check) {
|
|||
// set graph munber to not_dataset_graph_sum_
|
||||
metadata.set_graph_num(not_dataset_graph_sum_);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply reply_metadata = grpc_client_->SendMetadata(metadata);
|
||||
|
||||
bool ret = false;
|
||||
|
@ -681,6 +681,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot
|
|||
if (!SendMetadata(true)) {
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
// send multiple graphs to mindinght server
|
||||
// split graph into chunks if one graph is larger than chunk size
|
||||
std::list<Chunk> chunked_graph_proto_list;
|
||||
|
@ -716,6 +717,7 @@ void Debugger::SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_prot
|
|||
|
||||
void Debugger::CommandLoop() {
|
||||
// prepare metadata
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id());
|
||||
Metadata metadata;
|
||||
|
||||
|
@ -732,6 +734,7 @@ void Debugger::CommandLoop() {
|
|||
|
||||
while (!run) {
|
||||
// wait for command
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply reply = grpc_client_->WaitForCommand(metadata);
|
||||
if (reply.status() != reply.OK) {
|
||||
MS_LOG(ERROR) << "Error: WaitForCommand failed";
|
||||
|
@ -885,6 +888,7 @@ void Debugger::ViewValueLevel(const EventReply &reply) {
|
|||
}
|
||||
MS_LOG(INFO) << "tensor dtype: " << tensor.data_type();
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply send_tensors_reply = grpc_client_->SendTensors(tensors);
|
||||
if (send_tensors_reply.status() != debugger::EventReply::OK) {
|
||||
MS_LOG(ERROR) << "Error: SendTensors failed";
|
||||
|
@ -1127,6 +1131,7 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints(const std::string &watchnode
|
|||
void Debugger::SendWatchpoints(const std::list<WatchpointHit> &points) {
|
||||
// send info about watchpoint
|
||||
if (!points.empty()) {
|
||||
MS_EXCEPTION_IF_NULL(grpc_client_);
|
||||
EventReply reply = grpc_client_->SendWatchpointHits(points);
|
||||
if (reply.status() != reply.OK) {
|
||||
MS_LOG(ERROR) << "Error: SendWatchpointHits failed";
|
||||
|
@ -1141,16 +1146,6 @@ bool Debugger::DumpTensorToFile(const std::string &tensor_name, bool trans_flag,
|
|||
device_type, addr_format, slot);
|
||||
}
|
||||
|
||||
bool Debugger::DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel) const {
|
||||
return debug_services_.get()->IsWatchPoint(kernel_name, kernel);
|
||||
}
|
||||
|
||||
void Debugger::EmptyTensor() { debug_services_.get()->EmptyTensor(); }
|
||||
|
||||
void Debugger::SetTensorLoaderIterNum(uint32_t iter_num) { debug_services_.get()->SetTensorLoaderIterNum(iter_num); }
|
||||
|
||||
uint32_t Debugger::GetTensorLoaderIterNum() const { return debug_services_.get()->GetTensorLoaderIterNum(); }
|
||||
|
||||
bool Debugger::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev) {
|
||||
return debug_services_.get()->LoadNewTensor(tensor, keep_prev);
|
||||
}
|
||||
|
@ -1273,14 +1268,6 @@ void Debugger::SetCurNode(const std::string &cur_name) {
|
|||
|
||||
std::string Debugger::run_level() const { return run_level_; }
|
||||
|
||||
void Debugger::SetStepNum(int32_t cur_num_step) {
|
||||
// access lock for public method
|
||||
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||
num_step_ = cur_num_step;
|
||||
}
|
||||
|
||||
int32_t Debugger::step_num() const { return num_step_; }
|
||||
|
||||
void Debugger::SetTrainingDone(bool training_done) { training_done_ = training_done; }
|
||||
|
||||
bool Debugger::CheckPort(const std::string &port) const {
|
||||
|
@ -1377,6 +1364,7 @@ void Debugger::LoadParametersAndConst() {
|
|||
void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) {
|
||||
if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
// load parameters
|
||||
MS_LOG(INFO) << "Start to load Parameters for graph " << graph->graph_id() << ".";
|
||||
const auto ¶meters = graph_ptr_->inputs();
|
||||
|
@ -1432,6 +1420,8 @@ void Debugger::LoadGraphOutputs() {
|
|||
}
|
||||
|
||||
void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(debugger_);
|
||||
// update step number if we are processing the first graph (to support multigraph)
|
||||
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()) &&
|
||||
(graph->graph_id() == debugger_->GetFirstRunGraphId())) {
|
||||
|
|
|
@ -102,21 +102,10 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
void PostExecuteNode(const CNodePtr &kernel, bool last_kernel);
|
||||
|
||||
// suspend the execution after a debug_op
|
||||
void PostDebugOp();
|
||||
|
||||
bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
|
||||
const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type,
|
||||
TypeId device_type, const std::string &addr_format, size_t slot) const;
|
||||
|
||||
bool DebugServicesIsWatchPoint(const std::string &kernel_name, const CNodePtr &kernel = nullptr) const;
|
||||
|
||||
void EmptyTensor();
|
||||
|
||||
void SetTensorLoaderIterNum(uint32_t iter_num);
|
||||
|
||||
uint32_t GetTensorLoaderIterNum() const;
|
||||
|
||||
bool LoadNewTensor(const std::shared_ptr<TensorData> &tensor, bool keep_prev);
|
||||
|
||||
bool debugger_enabled() const;
|
||||
|
@ -129,10 +118,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
std::string run_level() const;
|
||||
|
||||
void SetStepNum(int32_t cur_num_step);
|
||||
|
||||
int32_t step_num() const;
|
||||
|
||||
// check if any feature that uses the debugger backend is enabled
|
||||
bool DebuggerBackendEnabled() const;
|
||||
|
||||
|
@ -291,8 +276,6 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
using DebuggerPtr = std::shared_ptr<Debugger>;
|
||||
// get debugger ModelProto
|
||||
std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph);
|
||||
|
||||
ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph);
|
||||
|
||||
// for getting proto DataType from Type of Tensor
|
||||
|
|
|
@ -506,11 +506,6 @@ void DebuggerProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t>
|
|||
|
||||
void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(debugger::IR_VERSION); }
|
||||
|
||||
std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph) {
|
||||
DebuggerProtoExporter exporter;
|
||||
return exporter.GetFuncGraphProtoString(func_graph);
|
||||
}
|
||||
|
||||
debugger::ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph) {
|
||||
DebuggerProtoExporter exporter;
|
||||
return exporter.GetFuncGraphProto(func_graph);
|
||||
|
|
|
@ -120,10 +120,6 @@ class TensorLoader {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t GetIterNum() const { return iter_num_; }
|
||||
|
||||
std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map_; }
|
||||
|
||||
std::shared_ptr<TensorData> GetPrevTensor(const std::string &tensor_name) {
|
||||
if (tensor_list_map_.find(tensor_name + ":prev") != tensor_list_map_.end()) {
|
||||
return tensor_list_map_[tensor_name + ":prev"];
|
||||
|
@ -152,8 +148,6 @@ class TensorLoader {
|
|||
|
||||
void EmptyCurrentTensor() { tensor_list_map_.clear(); }
|
||||
|
||||
void set_iter_num(uint32_t iter_num) { this->iter_num_ = iter_num; }
|
||||
|
||||
bool EnableMemoryControl() { return mem_total_ > 0; }
|
||||
|
||||
void AppendToCacheEvictQueue(const std::string &tensor_name) {
|
||||
|
|
|
@ -792,6 +792,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
gpu_kernel->PostExecute();
|
||||
}
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
MS_EXCEPTION_IF_NULL(debugger_);
|
||||
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
|
||||
LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
|
||||
dump_enabled, kernel == last_kernel);
|
||||
|
@ -802,6 +803,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
if (!UpdateMemorySwapTask(kernel, mock, profiling)) {
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
if (!mock) {
|
||||
MS_EXCEPTION_IF_NULL(debugger_);
|
||||
// invalidate current data collected by the debugger
|
||||
debugger_->ClearCurrentData();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue