forked from mindspore-Ecosystem/mindspore
!6668 [Data Dump] [master] Fix asynchronous data dump failure
Merge pull request !6668 from caifubi/dump
This commit is contained in:
commit
d3bd1cb5b8
|
@ -326,6 +326,11 @@ add_library(inference SHARED
|
|||
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
|
||||
-Wl,--whole-archive mindspore -Wl,--no-whole-archive mindspore_gvar mindspore::protobuf)
|
||||
|
||||
if (ENABLE_D)
|
||||
target_link_libraries(_c_expression PRIVATE ${ASCEND_PATH}/fwkacllib/lib64/libadump_server.a)
|
||||
target_link_libraries(inference PRIVATE ${ASCEND_PATH}/fwkacllib/lib64/libadump_server.a)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CPU)
|
||||
target_link_libraries(inference PRIVATE mindspore::dnnl mindspore::mkldnn)
|
||||
endif ()
|
||||
|
@ -339,4 +344,3 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|||
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include "debug/data_dump/e2e_dump_util.h"
|
||||
#include "debug/anf_ir_dump.h"
|
||||
#include "debug/dump_proto.h"
|
||||
#include "toolchain/adx_datadump_server.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
|
@ -420,6 +421,16 @@ void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const {
|
|||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
void DumpInit() {
|
||||
auto &json_parser = DumpJsonParser::GetInstance();
|
||||
json_parser.Parse();
|
||||
if (json_parser.async_dump_enabled()) {
|
||||
if (AdxDataDumpServerInit() != 0) {
|
||||
MS_LOG(EXCEPTION) << "Adx data dump server init failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AscendSession::InitRuntimeResource() {
|
||||
MS_LOG(INFO) << "Start!";
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
|
@ -427,7 +438,7 @@ void AscendSession::InitRuntimeResource() {
|
|||
if (!runtime_instance->Init()) {
|
||||
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
|
||||
}
|
||||
DumpJsonParser::GetInstance().Parse();
|
||||
DumpInit();
|
||||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ void DumpJsonParser::ParseAsyncDumpSetting(const nlohmann::json &content) {
|
|||
}
|
||||
|
||||
void DumpJsonParser::ParseE2eDumpSetting(const nlohmann::json &content) {
|
||||
auto e2e_dump_setting = CheckJsonKeyExist(content, kE2eDumpSettings);
|
||||
auto e2e_dump_setting = content.find(kE2eDumpSettings);
|
||||
if (e2e_dump_setting == content.end()) {
|
||||
MS_LOG(INFO) << "No e2e_dump_settings";
|
||||
return;
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "runtime/device/ascend/ascend_memory_manager.h"
|
||||
#include "debug/tensor_load.h"
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "toolchain/adx_datadump_server.h"
|
||||
#include "utils/shape_utils.h"
|
||||
#ifdef MEM_REUSE_DEBUG
|
||||
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
|
||||
|
@ -169,6 +170,14 @@ bool AscendKernelRuntime::NeedDestroyHccl() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void AsyncDataDumpUninit() {
|
||||
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
|
||||
if (AdxDataDumpServerUnInit() != 0) {
|
||||
MS_LOG(ERROR) << "Adx data dump server uninit failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AscendKernelRuntime::ReleaseDeviceRes() {
|
||||
MS_LOG(INFO) << "Ascend finalize start";
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
|
@ -184,6 +193,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
|
|||
// release ge runtime
|
||||
ClearGraphModelMap();
|
||||
|
||||
AsyncDataDumpUninit();
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto ret = rtSetDevice(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "runtime/mem.h"
|
||||
#include "runtime/rt_model.h"
|
||||
#include "runtime/stream.h"
|
||||
#include "toolchain/adx_datadump_server.h"
|
||||
|
||||
rtError_t rtEventSynchronize(rtEvent_t event) { return RT_ERROR_NONE; }
|
||||
|
||||
|
@ -141,3 +142,7 @@ rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback) { return RT_ERROR_NON
|
|||
RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList) { return RT_ERROR_NONE; }
|
||||
|
||||
RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList) { return RT_ERROR_NONE; }
|
||||
|
||||
int AdxDataDumpServerInit() { return 0; }
|
||||
|
||||
int AdxDataDumpServerUnInit() { return 0; }
|
||||
|
|
Loading…
Reference in New Issue