forked from mindspore-Ecosystem/mindspore
fixed core dump issue in bert net
This commit is contained in:
parent
15bb794956
commit
cdd7a672bd
|
@ -622,6 +622,20 @@ void Debugger::CheckDatasetGraph() {
|
|||
is_dataset_graph_ = false;
|
||||
}
|
||||
|
||||
bool Debugger::CheckDatasetGraph(const KernelGraphPtr &graph_ptr) {
|
||||
const auto &nodes = graph_ptr->execution_order();
|
||||
for (const auto &node : nodes) {
|
||||
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||
MS_LOG(INFO) << "node: " << GetKernelNodeName(node);
|
||||
if (node_name == "GetNext" || node_name == "InitDataSetQueue") {
|
||||
MS_LOG(INFO) << "Not enabling debugger for graph " << graph_ptr->graph_id() << ": found dataset graph node "
|
||||
<< node_name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
|
||||
// convert kernel graph to debugger modelproto
|
||||
ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
|
||||
|
@ -1423,8 +1437,14 @@ void Debugger::UpdateStepNumGPU() {
|
|||
}
|
||||
|
||||
void Debugger::ClearCurrentData() {
|
||||
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
|
||||
if ((device_target_ == kGPUDevice) && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) {
|
||||
if (debug_services_) {
|
||||
debug_services_->EmptyCurrentTensor();
|
||||
|
||||
} else {
|
||||
MS_LOG(ERROR) << "debug_services_ is nullptr";
|
||||
}
|
||||
}
|
||||
}
|
||||
bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
|
||||
return debug_services_->TensorExistsInCurrent(tensor_name);
|
||||
|
|
|
@ -170,6 +170,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// check if dump using debugger backend is enabled
|
||||
bool CheckDebuggerDumpEnabled() const;
|
||||
|
||||
bool CheckDatasetGraph(const KernelGraphPtr &graph_ptr);
|
||||
|
||||
private:
|
||||
// private constructor for singleton
|
||||
Debugger();
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "debug/debugger/debugger_utils.h"
|
||||
#endif
|
||||
|
||||
using KernelGraph = mindspore::session::KernelGraph;
|
||||
namespace mindspore {
|
||||
namespace runtime {
|
||||
|
||||
|
@ -52,6 +53,14 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
|
|||
}
|
||||
} else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
// debugger is not enabled for dataset graphs
|
||||
if (Debugger::GetInstance()->CheckDatasetGraph(kernel_graph)) {
|
||||
// Call back to the from actor to process after debug finished.
|
||||
Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
|
||||
return;
|
||||
}
|
||||
auto debugger = Debugger::GetInstance();
|
||||
if (debugger) {
|
||||
std::string kernel_name = cnode->fullname_with_scope();
|
||||
|
|
Loading…
Reference in New Issue