forked from mindspore-Ecosystem/mindspore
fixed core dump issue in bert net
This commit is contained in:
parent
15bb794956
commit
cdd7a672bd
|
@ -622,6 +622,20 @@ void Debugger::CheckDatasetGraph() {
|
||||||
is_dataset_graph_ = false;
|
is_dataset_graph_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Debugger::CheckDatasetGraph(const KernelGraphPtr &graph_ptr) {
|
||||||
|
const auto &nodes = graph_ptr->execution_order();
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||||
|
MS_LOG(INFO) << "node: " << GetKernelNodeName(node);
|
||||||
|
if (node_name == "GetNext" || node_name == "InitDataSetQueue") {
|
||||||
|
MS_LOG(INFO) << "Not enabling debugger for graph " << graph_ptr->graph_id() << ": found dataset graph node "
|
||||||
|
<< node_name;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
|
GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
|
||||||
// convert kernel graph to debugger modelproto
|
// convert kernel graph to debugger modelproto
|
||||||
ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
|
ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
|
||||||
|
@ -1423,8 +1437,14 @@ void Debugger::UpdateStepNumGPU() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Debugger::ClearCurrentData() {
|
void Debugger::ClearCurrentData() {
|
||||||
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
|
if ((device_target_ == kGPUDevice) && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) {
|
||||||
debug_services_->EmptyCurrentTensor();
|
if (debug_services_) {
|
||||||
|
debug_services_->EmptyCurrentTensor();
|
||||||
|
|
||||||
|
} else {
|
||||||
|
MS_LOG(ERROR) << "debug_services_ is nullptr";
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
|
bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
|
||||||
return debug_services_->TensorExistsInCurrent(tensor_name);
|
return debug_services_->TensorExistsInCurrent(tensor_name);
|
||||||
|
|
|
@ -170,6 +170,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
||||||
// check if dump using debugger backend is enabled
|
// check if dump using debugger backend is enabled
|
||||||
bool CheckDebuggerDumpEnabled() const;
|
bool CheckDebuggerDumpEnabled() const;
|
||||||
|
|
||||||
|
bool CheckDatasetGraph(const KernelGraphPtr &graph_ptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// private constructor for singleton
|
// private constructor for singleton
|
||||||
Debugger();
|
Debugger();
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "debug/debugger/debugger_utils.h"
|
#include "debug/debugger/debugger_utils.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
using KernelGraph = mindspore::session::KernelGraph;
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace runtime {
|
namespace runtime {
|
||||||
|
|
||||||
|
@ -52,6 +53,14 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
|
||||||
}
|
}
|
||||||
} else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
|
} else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
|
||||||
#ifdef ENABLE_DEBUGGER
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
|
// debugger is not enabled for dataset graphs
|
||||||
|
if (Debugger::GetInstance()->CheckDatasetGraph(kernel_graph)) {
|
||||||
|
// Call back to the from actor to process after debug finished.
|
||||||
|
Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
|
||||||
|
return;
|
||||||
|
}
|
||||||
auto debugger = Debugger::GetInstance();
|
auto debugger = Debugger::GetInstance();
|
||||||
if (debugger) {
|
if (debugger) {
|
||||||
std::string kernel_name = cnode->fullname_with_scope();
|
std::string kernel_name = cnode->fullname_with_scope();
|
||||||
|
|
Loading…
Reference in New Issue