fixed core dump issue in bert net

This commit is contained in:
Parastoo Ashtari 2021-07-12 15:48:43 -04:00
parent 15bb794956
commit cdd7a672bd
3 changed files with 33 additions and 2 deletions

View File

@ -622,6 +622,20 @@ void Debugger::CheckDatasetGraph() {
is_dataset_graph_ = false;
}
bool Debugger::CheckDatasetGraph(const KernelGraphPtr &graph_ptr) {
const auto &nodes = graph_ptr->execution_order();
for (const auto &node : nodes) {
auto node_name = AnfAlgo::GetCNodeName(node);
MS_LOG(INFO) << "node: " << GetKernelNodeName(node);
if (node_name == "GetNext" || node_name == "InitDataSetQueue") {
MS_LOG(INFO) << "Not enabling debugger for graph " << graph_ptr->graph_id() << ": found dataset graph node "
<< node_name;
return true;
}
}
return false;
}
GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
// convert kernel graph to debugger modelproto
ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
@ -1423,8 +1437,14 @@ void Debugger::UpdateStepNumGPU() {
}
void Debugger::ClearCurrentData() {
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
debug_services_->EmptyCurrentTensor();
if ((device_target_ == kGPUDevice) && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) {
if (debug_services_) {
debug_services_->EmptyCurrentTensor();
} else {
MS_LOG(ERROR) << "debug_services_ is nullptr";
}
}
}
bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
return debug_services_->TensorExistsInCurrent(tensor_name);

View File

@ -170,6 +170,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
// check if dump using debugger backend is enabled
bool CheckDebuggerDumpEnabled() const;
bool CheckDatasetGraph(const KernelGraphPtr &graph_ptr);
private:
// private constructor for singleton
Debugger();

View File

@ -27,6 +27,7 @@
#include "debug/debugger/debugger_utils.h"
#endif
using KernelGraph = mindspore::session::KernelGraph;
namespace mindspore {
namespace runtime {
@ -52,6 +53,14 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
}
} else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
#ifdef ENABLE_DEBUGGER
auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
// debugger is not enabled for dataset graphs
if (Debugger::GetInstance()->CheckDatasetGraph(kernel_graph)) {
// Call back to the from actor to process after debug finished.
Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
return;
}
auto debugger = Debugger::GetInstance();
if (debugger) {
std::string kernel_name = cnode->fullname_with_scope();