forked from mindspore-Ecosystem/mindspore
Implementation for mindspore debugger
This commit is contained in:
parent
73f440a54d
commit
4c056855e0
|
@ -12,7 +12,7 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Werror -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
||||||
endif()
|
endif()
|
||||||
|
|
15
build.sh
15
build.sh
|
@ -25,7 +25,7 @@ usage()
|
||||||
echo "Usage:"
|
echo "Usage:"
|
||||||
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
|
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
|
||||||
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
|
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off]"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Options:"
|
echo "Options:"
|
||||||
echo " -d Debug mode"
|
echo " -d Debug mode"
|
||||||
|
@ -54,6 +54,7 @@ usage()
|
||||||
echo " -I Compile predict, default off"
|
echo " -I Compile predict, default off"
|
||||||
echo " -K Compile with AKG, default off"
|
echo " -K Compile with AKG, default off"
|
||||||
echo " -s Enable serving module, default off"
|
echo " -s Enable serving module, default off"
|
||||||
|
echo " -B Enable debugger, default off"
|
||||||
}
|
}
|
||||||
|
|
||||||
# check value of input is 'on' or 'off'
|
# check value of input is 'on' or 'off'
|
||||||
|
@ -94,8 +95,10 @@ checkopts()
|
||||||
PREDICT_PLATFORM=""
|
PREDICT_PLATFORM=""
|
||||||
ENABLE_AKG="on"
|
ENABLE_AKG="on"
|
||||||
ENABLE_SERVING="off"
|
ENABLE_SERVING="off"
|
||||||
|
ENABLE_DEBUGGER="off"
|
||||||
|
|
||||||
# Process the options
|
# Process the options
|
||||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:s' opt
|
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:' opt
|
||||||
do
|
do
|
||||||
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
|
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
|
||||||
case "${opt}" in
|
case "${opt}" in
|
||||||
|
@ -240,6 +243,11 @@ checkopts()
|
||||||
ENABLE_SERVING="on"
|
ENABLE_SERVING="on"
|
||||||
echo "enable serving"
|
echo "enable serving"
|
||||||
;;
|
;;
|
||||||
|
B)
|
||||||
|
check_on_off $OPTARG B
|
||||||
|
ENABLE_DEBUGGER="on"
|
||||||
|
echo "enable debugger"
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Unknown option ${opt}!"
|
echo "Unknown option ${opt}!"
|
||||||
usage
|
usage
|
||||||
|
@ -322,6 +330,9 @@ build_mindspore()
|
||||||
if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
|
if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
|
||||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
|
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
|
||||||
fi
|
fi
|
||||||
|
if [[ "X$ENABLE_DEBUGGER" = "Xon" ]]; then
|
||||||
|
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DEBUGGER=ON"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "${CMAKE_ARGS}"
|
echo "${CMAKE_ARGS}"
|
||||||
if [[ "X$INC_BUILD" = "Xoff" ]]; then
|
if [[ "X$INC_BUILD" = "Xoff" ]]; then
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
mindspore_add_pkg(absl
|
||||||
|
VER 20200225.2
|
||||||
|
LIBS absl_strings absl_throw_delegate absl_raw_logging_internal absl_int128 absl_bad_optional_access
|
||||||
|
URL https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz
|
||||||
|
MD5 73f2b6e72f1599a9139170c29482ddc4
|
||||||
|
CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=TRUE)
|
||||||
|
|
||||||
|
include_directories(${absl_INC})
|
||||||
|
|
||||||
|
add_library(mindspore::absl_strings ALIAS absl::absl_strings)
|
||||||
|
add_library(mindspore::absl_throw_delegate ALIAS absl::absl_throw_delegate)
|
||||||
|
add_library(mindspore::absl_raw_logging_internal ALIAS absl::absl_raw_logging_internal)
|
||||||
|
add_library(mindspore::absl_int128 ALIAS absl::absl_int128)
|
||||||
|
add_library(mindspore::absl_bad_optional_access ALIAS absl::absl_bad_optional_access)
|
|
@ -0,0 +1,12 @@
|
||||||
|
mindspore_add_pkg(c-ares
|
||||||
|
VER 1.15.0
|
||||||
|
LIBS cares
|
||||||
|
URL https://github.com/c-ares/c-ares/releases/download/cares-1_15_0/c-ares-1.15.0.tar.gz
|
||||||
|
MD5 d2391da274653f7643270623e822dff7
|
||||||
|
CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
|
||||||
|
-DCARES_SHARED:BOOL=OFF
|
||||||
|
-DCARES_STATIC:BOOL=ON
|
||||||
|
-DCARES_STATIC_PIC:BOOL=ON)
|
||||||
|
|
||||||
|
include_directories(${c-ares_INC})
|
||||||
|
add_library(mindspore::cares ALIAS c-ares::cares)
|
|
@ -0,0 +1,110 @@
|
||||||
|
set(grpc_USE_STATIC_LIBS ON)
|
||||||
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
|
set(grpc_CXXFLAGS "-fstack-protector-all -Wno-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
|
||||||
|
elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||||
|
set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -O2")
|
||||||
|
else()
|
||||||
|
set(grpc_CXXFLAGS "-fstack-protector-all -Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fvisibility=hidden -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(grpc_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
|
||||||
|
|
||||||
|
|
||||||
|
if (EXISTS ${protobuf_ROOT}/lib64)
|
||||||
|
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib64/cmake/protobuf")
|
||||||
|
else()
|
||||||
|
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${protobuf_ROOT}/lib/cmake/protobuf")
|
||||||
|
endif()
|
||||||
|
message("grpc using Protobuf_DIR : " ${_FINDPACKAGE_PROTOBUF_CONFIG_DIR})
|
||||||
|
|
||||||
|
if (EXISTS ${absl_ROOT}/lib64)
|
||||||
|
set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib64/cmake/absl")
|
||||||
|
else()
|
||||||
|
set(_FINDPACKAGE_ABSL_CONFIG_DIR "${absl_ROOT}/lib/cmake/absl")
|
||||||
|
endif()
|
||||||
|
message("grpc using absl_DIR : " ${_FINDPACKAGE_ABSL_CONFIG_DIR})
|
||||||
|
|
||||||
|
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
|
||||||
|
if (OPENSSL_ROOT_DIR)
|
||||||
|
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
mindspore_add_pkg(grpc
|
||||||
|
VER 1.27.3
|
||||||
|
LIBS grpc++ grpc gpr upb address_sorting
|
||||||
|
EXE grpc_cpp_plugin
|
||||||
|
URL https://github.com/grpc/grpc/archive/v1.27.3.tar.gz
|
||||||
|
MD5 0c6c3fc8682d4262dd0e5e6fabe1a7e2
|
||||||
|
CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release
|
||||||
|
-DgRPC_INSTALL:BOOL=ON
|
||||||
|
-DgRPC_BUILD_TESTS:BOOL=OFF
|
||||||
|
-DgRPC_PROTOBUF_PROVIDER:STRING=package
|
||||||
|
-DgRPC_PROTOBUF_PACKAGE_TYPE:STRING=CONFIG
|
||||||
|
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
|
||||||
|
-DgRPC_ZLIB_PROVIDER:STRING=package
|
||||||
|
-DZLIB_ROOT:PATH=${zlib_ROOT}
|
||||||
|
-DgRPC_ABSL_PROVIDER:STRING=package
|
||||||
|
-Dabsl_DIR:PATH=${_FINDPACKAGE_ABSL_CONFIG_DIR}
|
||||||
|
-DgRPC_CARES_PROVIDER:STRING=package
|
||||||
|
-Dc-ares_DIR:PATH=${c-ares_ROOT}/lib/cmake/c-ares
|
||||||
|
-DgRPC_SSL_PROVIDER:STRING=package
|
||||||
|
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
include_directories(${grpc_INC})
|
||||||
|
|
||||||
|
add_library(mindspore::grpc++ ALIAS grpc::grpc++)
|
||||||
|
|
||||||
|
# link other grpc libs
|
||||||
|
target_link_libraries(grpc::grpc++ INTERFACE grpc::grpc grpc::gpr grpc::upb grpc::address_sorting)
|
||||||
|
|
||||||
|
# link built dependencies
|
||||||
|
target_link_libraries(grpc::grpc++ INTERFACE mindspore::z)
|
||||||
|
target_link_libraries(grpc::grpc++ INTERFACE mindspore::cares)
|
||||||
|
target_link_libraries(grpc::grpc++ INTERFACE mindspore::absl_strings mindspore::absl_throw_delegate
|
||||||
|
mindspore::absl_raw_logging_internal mindspore::absl_int128 mindspore::absl_bad_optional_access)
|
||||||
|
|
||||||
|
# link system openssl
|
||||||
|
find_package(OpenSSL REQUIRED)
|
||||||
|
target_link_libraries(grpc::grpc++ INTERFACE OpenSSL::SSL OpenSSL::Crypto)
|
||||||
|
|
||||||
|
|
||||||
|
function(ms_grpc_generate c_var h_var)
|
||||||
|
if(NOT ARGN)
|
||||||
|
message(SEND_ERROR "Error: ms_grpc_generate() called without any proto files")
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(${c_var})
|
||||||
|
set(${h_var})
|
||||||
|
|
||||||
|
foreach(file ${ARGN})
|
||||||
|
get_filename_component(abs_file ${file} ABSOLUTE)
|
||||||
|
get_filename_component(file_name ${file} NAME_WE)
|
||||||
|
get_filename_component(file_dir ${abs_file} PATH)
|
||||||
|
file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})
|
||||||
|
|
||||||
|
list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
|
||||||
|
list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
|
||||||
|
list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc")
|
||||||
|
list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h")
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
|
||||||
|
"${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
|
||||||
|
"${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.cc"
|
||||||
|
"${CMAKE_BINARY_DIR}/proto/${file_name}.grpc.pb.h"
|
||||||
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
|
||||||
|
COMMAND protobuf::protoc --version
|
||||||
|
COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto
|
||||||
|
--grpc_out=${CMAKE_BINARY_DIR}/proto --plugin=protoc-gen-grpc=$<TARGET_FILE:grpc::grpc_cpp_plugin> ${abs_file}
|
||||||
|
DEPENDS protobuf::protoc grpc::grpc_cpp_plugin ${abs_file}
|
||||||
|
COMMENT "Running C++ gRPC compiler on ${file}" VERBATIM)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
|
||||||
|
set(${c_var} ${${c_var}} PARENT_SCOPE)
|
||||||
|
set(${h_var} ${${h_var}} PARENT_SCOPE)
|
||||||
|
|
||||||
|
endfunction()
|
|
@ -0,0 +1,9 @@
|
||||||
|
mindspore_add_pkg(zlib
|
||||||
|
VER 1.2.11
|
||||||
|
LIBS z
|
||||||
|
URL https://github.com/madler/zlib/archive/v1.2.11.tar.gz
|
||||||
|
MD5 0095d2d2d1f3442ce1318336637b695f
|
||||||
|
CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release)
|
||||||
|
|
||||||
|
include_directories(${zlib_INC})
|
||||||
|
add_library(mindspore::z ALIAS zlib::z)
|
|
@ -14,6 +14,16 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/eigen.cmake)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
|
||||||
|
|
||||||
|
if (ENABLE_DEBUGGER)
|
||||||
|
# build dependencies of gRPC
|
||||||
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
|
||||||
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
|
||||||
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake)
|
||||||
|
# build gRPC
|
||||||
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake)
|
||||||
|
endif()
|
||||||
|
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake)
|
||||||
MESSAGE("go to link flatbuffers")
|
MESSAGE("go to link flatbuffers")
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
|
||||||
|
|
|
@ -17,6 +17,7 @@ option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF)
|
||||||
option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
|
option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
|
||||||
option(ENABLE_MPI "enable mpi" OFF)
|
option(ENABLE_MPI "enable mpi" OFF)
|
||||||
option(ENABLE_AKG "enable akg" OFF)
|
option(ENABLE_AKG "enable akg" OFF)
|
||||||
|
option(ENABLE_DEBUGGER "enable debugger" OFF)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -112,3 +113,7 @@ endif()
|
||||||
if(ENABLE_DUMP_E2E)
|
if(ENABLE_DUMP_E2E)
|
||||||
add_compile_definitions(ENABLE_DUMP_E2E)
|
add_compile_definitions(ENABLE_DUMP_E2E)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(ENABLE_DEBUGGER)
|
||||||
|
add_compile_definitions(ENABLE_DEBUGGER)
|
||||||
|
endif()
|
||||||
|
|
|
@ -71,6 +71,17 @@ message("onnx proto path is :" ${ONNX_PROTO})
|
||||||
ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
|
ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
|
||||||
list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})
|
list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})
|
||||||
|
|
||||||
|
if (ENABLE_DEBUGGER)
|
||||||
|
# debugger: compile proto files
|
||||||
|
include_directories("${CMAKE_BINARY_DIR}/debug/debugger")
|
||||||
|
file(GLOB_RECURSE DEBUGGER_PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_graph.proto")
|
||||||
|
ms_protobuf_generate(DEBUGGER_PROTO_SRCS DEBUGGER_PROTO_HDRS ${DEBUGGER_PROTO_LIST})
|
||||||
|
file(GLOB_RECURSE DEBUGGER_GRPC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "debug/debugger/debug_grpc.proto")
|
||||||
|
ms_grpc_generate(DEBUGGER_GRPC_SRCS DEBUGGER_GRPC_HDRS ${DEBUGGER_GRPC_LIST})
|
||||||
|
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_PROTO_SRCS})
|
||||||
|
list(APPEND MINDSPORE_PROTO_LIST ${DEBUGGER_GRPC_SRCS})
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (ENABLE_DUMP_PROTO)
|
if (ENABLE_DUMP_PROTO)
|
||||||
include_directories(${CMAKE_BINARY_DIR})
|
include_directories(${CMAKE_BINARY_DIR})
|
||||||
file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
|
file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
|
||||||
|
@ -125,6 +136,14 @@ endforeach ()
|
||||||
|
|
||||||
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
||||||
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
||||||
|
|
||||||
|
target_link_libraries(proto_input mindspore::protobuf)
|
||||||
|
|
||||||
|
if (ENABLE_DEBUGGER)
|
||||||
|
# debugger: link grpc
|
||||||
|
target_link_libraries(proto_input mindspore::grpc++)
|
||||||
|
endif()
|
||||||
|
|
||||||
target_link_libraries(mindspore proto_input)
|
target_link_libraries(mindspore proto_input)
|
||||||
if (ENABLE_CPU AND ENABLE_MPI)
|
if (ENABLE_CPU AND ENABLE_MPI)
|
||||||
target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi)
|
target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi)
|
||||||
|
@ -217,6 +236,7 @@ if (USE_GLOG)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (ENABLE_DUMP_PROTO)
|
if (ENABLE_DUMP_PROTO)
|
||||||
|
message("add protobuf lib to c_expression")
|
||||||
target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
|
target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,15 @@ set(_DEBUG_SRC_LIST
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/trace.cc"
|
"${CMAKE_CURRENT_SOURCE_DIR}/trace.cc"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (ENABLE_DEBUGGER)
|
||||||
|
list(APPEND _DEBUG_SRC_LIST
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/debugger.cc"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/grpc_client.cc"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/debugger/proto_exporter.cc"
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/debug_services.cc"
|
||||||
|
)
|
||||||
|
endif (ENABLE_DEBUGGER)
|
||||||
|
|
||||||
if (ENABLE_DUMP_E2E)
|
if (ENABLE_DUMP_E2E)
|
||||||
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
|
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
|
||||||
endif (ENABLE_DUMP_E2E)
|
endif (ENABLE_DUMP_E2E)
|
||||||
|
|
|
@ -0,0 +1,194 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "debug/debug_services.h"
|
||||||
|
namespace mindspore {
|
||||||
|
|
||||||
|
DebugServices::DebugServices() {
|
||||||
|
tensor_loader_ = new TensorLoader();
|
||||||
|
uint32_t iter_num = -1;
|
||||||
|
tensor_loader_->set_iter_num(iter_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugServices::DebugServices(const DebugServices &other) {
|
||||||
|
tensor_loader_ = other.tensor_loader_;
|
||||||
|
watchpoint_table = other.watchpoint_table;
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugServices &DebugServices::operator=(const DebugServices &other) {
|
||||||
|
if (this != &other) {
|
||||||
|
tensor_loader_ = other.tensor_loader_;
|
||||||
|
watchpoint_table = other.watchpoint_table;
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugServices::~DebugServices() { delete tensor_loader_; }
|
||||||
|
|
||||||
|
void DebugServices::add_watchpoint(unsigned int id, unsigned int watch_condition,
|
||||||
|
const std::vector<std::tuple<std::string, bool>> &check_node_list) {
|
||||||
|
std::lock_guard<std::mutex> lg(lock_);
|
||||||
|
|
||||||
|
watchpoint_t watchpoint_item;
|
||||||
|
|
||||||
|
watchpoint_item.id = id;
|
||||||
|
|
||||||
|
if (watch_condition == 0) {
|
||||||
|
watchpoint_item.conditions.nan.enabled = true;
|
||||||
|
} else if (watch_condition == 1) {
|
||||||
|
watchpoint_item.conditions.inf.enabled = true;
|
||||||
|
watchpoint_item.conditions.neg_inf.enabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
watchpoint_item.check_node_list = check_node_list;
|
||||||
|
|
||||||
|
watchpoint_table[id] = watchpoint_item;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugServices::remove_watchpoint(unsigned int id) {
|
||||||
|
std::lock_guard<std::mutex> lg(lock_);
|
||||||
|
watchpoint_table.erase(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugServices::check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot,
|
||||||
|
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
|
||||||
|
std::vector<int> *condition, std::vector<unsigned int> *wacthpoint_id) {
|
||||||
|
std::lock_guard<std::mutex> lg(lock_);
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<TensorData>> tensor_list = tensor_loader_->GetTensor();
|
||||||
|
|
||||||
|
std::string current_tensor_name;
|
||||||
|
std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < tensor_list.size(); i++) {
|
||||||
|
current_tensor_name = tensor_list[i]->GetName();
|
||||||
|
mindspore::tensor::TensorPtr tensor_ptr = tensor_list[i]->GetTensor();
|
||||||
|
int tensor_data_type = tensor_ptr->data_type_c();
|
||||||
|
|
||||||
|
// check if we need to analyze this node and for which watchpoints we will check
|
||||||
|
// create a list of watchpoints to check
|
||||||
|
watchpoints_to_check_table.clear();
|
||||||
|
for (auto w_table_item : watchpoint_table) {
|
||||||
|
// if the watchpoint is checking for a nan or inf and the current tensor is not of a float type, then
|
||||||
|
// don't check the watchpoint for this tensor
|
||||||
|
if (std::get<1>(w_table_item).conditions.inf.enabled || std::get<1>(w_table_item).conditions.neg_inf.enabled ||
|
||||||
|
std::get<1>(w_table_item).conditions.nan.enabled) {
|
||||||
|
if (tensor_data_type != kNumberTypeFloat16 && tensor_data_type != kNumberTypeFloat &&
|
||||||
|
tensor_data_type != kNumberTypeFloat32 && tensor_data_type != kNumberTypeFloat64) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto check_node_list = std::get<1>(w_table_item).check_node_list;
|
||||||
|
|
||||||
|
for (auto check_node : check_node_list) {
|
||||||
|
std::string w_name = std::get<0>(check_node);
|
||||||
|
bool w_type = std::get<1>(check_node);
|
||||||
|
|
||||||
|
// check if the current node tensor name is included the watchpoint
|
||||||
|
std::string current_node_name = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||||
|
if ((w_type == true && (current_tensor_name.find(w_name) != string::npos || w_name == "*")) ||
|
||||||
|
(w_type == false && current_node_name == w_name)) {
|
||||||
|
watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if no watchpoints are valid for the current tensor
|
||||||
|
if (watchpoints_to_check_table.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// need to add support for float16 and float64, and other types when we support conditions beyond inf and nan
|
||||||
|
if (tensor_data_type != kNumberTypeFloat && tensor_data_type != kNumberTypeFloat32) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
float *start_addr = reinterpret_cast<float *>(tensor_ptr->data_c(false));
|
||||||
|
unsigned int num_elements = (tensor_ptr->data().nbytes()) / sizeof(float);
|
||||||
|
|
||||||
|
std::unordered_map<unsigned int, watchpoint_t>::iterator it_w_table_check;
|
||||||
|
std::vector<unsigned int> hit_encountered;
|
||||||
|
|
||||||
|
for (unsigned int index = 0; index < num_elements; index++) {
|
||||||
|
float x = start_addr[index];
|
||||||
|
it_w_table_check = watchpoints_to_check_table.begin();
|
||||||
|
|
||||||
|
while (it_w_table_check != watchpoints_to_check_table.end()) {
|
||||||
|
if ((it_w_table_check->second.conditions.inf.enabled || it_w_table_check->second.conditions.neg_inf.enabled) &&
|
||||||
|
isinf(x)) {
|
||||||
|
hit_encountered.push_back(it_w_table_check->second.id);
|
||||||
|
} else if (it_w_table_check->second.conditions.nan.enabled && isnan(x)) {
|
||||||
|
hit_encountered.push_back(it_w_table_check->second.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
++it_w_table_check;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hit_encountered.size()) {
|
||||||
|
for (auto it_hit_id = hit_encountered.begin(); it_hit_id != hit_encountered.end(); ++it_hit_id) {
|
||||||
|
std::string name_no_slot = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||||
|
name->push_back(name_no_slot);
|
||||||
|
|
||||||
|
slot->push_back(std::to_string(tensor_list[i]->GetSlot()));
|
||||||
|
data_ptr->push_back(reinterpret_cast<char *>(tensor_ptr->data_c(false)));
|
||||||
|
data_size->push_back(tensor_ptr->data().nbytes());
|
||||||
|
|
||||||
|
int condition_item = -1;
|
||||||
|
if (watchpoint_table[*it_hit_id].conditions.nan.enabled) {
|
||||||
|
condition_item = 0;
|
||||||
|
} else if (watchpoint_table[*it_hit_id].conditions.inf.enabled ||
|
||||||
|
watchpoint_table[*it_hit_id].conditions.neg_inf.enabled) {
|
||||||
|
condition_item = 1;
|
||||||
|
}
|
||||||
|
condition->push_back(condition_item);
|
||||||
|
|
||||||
|
wacthpoint_id->push_back(*it_hit_id);
|
||||||
|
|
||||||
|
watchpoints_to_check_table.erase(*it_hit_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
hit_encountered.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (watchpoints_to_check_table.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebugServices::read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
|
||||||
|
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
|
||||||
|
std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape) {
|
||||||
|
std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> result_list;
|
||||||
|
tensor_loader_->SearchTensors(name, &result_list);
|
||||||
|
|
||||||
|
for (auto result : result_list) {
|
||||||
|
if (!std::get<1>(result)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ret_name->push_back(std::get<0>(result));
|
||||||
|
data_ptr->push_back(reinterpret_cast<char *>(std::get<1>(result)->GetTensor()->data_c(false)));
|
||||||
|
data_size->push_back(std::get<1>(result)->GetTensor()->data().nbytes());
|
||||||
|
dtype->push_back(std::get<1>(result)->GetTensor()->Dtype());
|
||||||
|
shape->push_back(std::get<1>(result)->GetTensor()->shape());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorLoader *DebugServices::get_tensor_loader() const { return tensor_loader_; }
|
||||||
|
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,95 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <tuple>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <mutex>
|
||||||
|
#include "debug/tensor_load.h"
|
||||||
|
#include "debug/tensor_data.h"
|
||||||
|
#include "ir/dtype.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
class DebugServices {
|
||||||
|
public:
|
||||||
|
DebugServices();
|
||||||
|
|
||||||
|
DebugServices(const DebugServices &other);
|
||||||
|
|
||||||
|
DebugServices &operator=(const DebugServices &other);
|
||||||
|
|
||||||
|
~DebugServices();
|
||||||
|
|
||||||
|
void add_watchpoint(unsigned int id, unsigned int watch_condition,
|
||||||
|
const std::vector<std::tuple<std::string, bool>> &check_node_list);
|
||||||
|
|
||||||
|
void remove_watchpoint(unsigned int id);
|
||||||
|
|
||||||
|
void check_watchpoints(std::vector<std::string> *name, std::vector<std::string> *slot, std::vector<char *> *data_ptr,
|
||||||
|
std::vector<unsigned int> *data_size, std::vector<int> *condition,
|
||||||
|
std::vector<unsigned int> *wacthpoint_id);
|
||||||
|
|
||||||
|
void read_nodes_tensors(std::vector<std::string> name, std::vector<std::string> *ret_name,
|
||||||
|
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
|
||||||
|
std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
|
||||||
|
|
||||||
|
TensorLoader *get_tensor_loader() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
typedef struct condition_no_param {
|
||||||
|
bool enabled = false;
|
||||||
|
} condition_no_param_t;
|
||||||
|
|
||||||
|
typedef struct condition_with_param {
|
||||||
|
bool enabled = false;
|
||||||
|
float parameter = 0;
|
||||||
|
} condition_with_param_t;
|
||||||
|
|
||||||
|
typedef struct conditions {
|
||||||
|
condition_no_param_t inf;
|
||||||
|
condition_no_param_t neg_inf;
|
||||||
|
condition_no_param_t nan;
|
||||||
|
condition_with_param_t max_below;
|
||||||
|
condition_with_param_t max_above;
|
||||||
|
condition_with_param_t min_below;
|
||||||
|
condition_with_param_t min_above;
|
||||||
|
condition_with_param_t max_minus_min_below;
|
||||||
|
condition_with_param_t max_minus_min_above;
|
||||||
|
condition_with_param_t mean_below;
|
||||||
|
condition_with_param_t mean_above;
|
||||||
|
condition_with_param_t std_dev_below;
|
||||||
|
condition_with_param_t std_dev_above;
|
||||||
|
} conditions_t;
|
||||||
|
|
||||||
|
typedef struct watchpoint {
|
||||||
|
unsigned int id;
|
||||||
|
conditions_t conditions;
|
||||||
|
std::vector<std::tuple<std::string, bool>> check_node_list;
|
||||||
|
} watchpoint_t;
|
||||||
|
|
||||||
|
std::mutex lock_;
|
||||||
|
|
||||||
|
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table;
|
||||||
|
|
||||||
|
TensorLoader *tensor_loader_;
|
||||||
|
};
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_DEBUG_DEBUG_SERVICES_H_
|
|
@ -0,0 +1,316 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package debugger;
|
||||||
|
|
||||||
|
// Versioning
|
||||||
|
enum Version {
|
||||||
|
// unknown version
|
||||||
|
UNKNOWWN_VERSION = 0;
|
||||||
|
|
||||||
|
// Initial version (IR VERSION 1), published on Sep 23, 2019
|
||||||
|
IR_VERSION = 0x0000000000000001;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data type definition
|
||||||
|
enum DataType {
|
||||||
|
DT_UNDEFINED = 0;
|
||||||
|
// Basic types.
|
||||||
|
DT_BOOL = 1; // bool
|
||||||
|
|
||||||
|
DT_INT8 = 2; // int8_t
|
||||||
|
DT_INT16 = 3; // int16_t
|
||||||
|
DT_INT32 = 4; // int32_t
|
||||||
|
DT_INT64 = 5; // int64_t
|
||||||
|
|
||||||
|
DT_UINT8 = 6; // uint8_t
|
||||||
|
DT_UINT16 = 7; // uint16_t
|
||||||
|
DT_UINT32 = 8; // uint32_t
|
||||||
|
DT_UINT64 = 9; // uint64_t
|
||||||
|
|
||||||
|
DT_FLOAT16 = 10; // float 16
|
||||||
|
DT_FLOAT32 = 11; // float 32
|
||||||
|
DT_FLOAT64 = 12; // float 64
|
||||||
|
|
||||||
|
DT_STRING = 13; // string
|
||||||
|
DT_TENSOR = 14; // tensor
|
||||||
|
DT_GRAPH = 15; // graph
|
||||||
|
|
||||||
|
// list type
|
||||||
|
DT_BOOLS = 16; // list of bool
|
||||||
|
|
||||||
|
DT_INTS8 = 17; // list of int8_t
|
||||||
|
DT_INTS16 = 18; // list of int16_t
|
||||||
|
DT_INTS32 = 19; // list of int32_t
|
||||||
|
DT_INTS64 = 20; // list of int64_t
|
||||||
|
|
||||||
|
DT_UINTS8 = 21; // list of uint8_t
|
||||||
|
DT_UINTS16 = 22; // list of uint16_t
|
||||||
|
DT_UINTS32 = 23; // list of uint32_t
|
||||||
|
DT_UINTS64 = 24; // list of uint64_t
|
||||||
|
|
||||||
|
DT_FLOATS16 = 25; // list of float16
|
||||||
|
DT_FLOATS32 = 26; // list of float32
|
||||||
|
DT_FLOATS64 = 27; // list of float64
|
||||||
|
|
||||||
|
DT_STRINGS = 28; // list of string
|
||||||
|
DT_TENSORS = 29; // list of tensor
|
||||||
|
DT_GRAPHS = 30; // list of graph
|
||||||
|
|
||||||
|
DT_TUPLE = 31; // tuple
|
||||||
|
DT_LIST = 32; // list
|
||||||
|
DT_DICT = 33; // dictionary
|
||||||
|
|
||||||
|
// other types
|
||||||
|
DT_NONE = 34; // None
|
||||||
|
DT_SYM_INST = 35; // Symbolic Key Instance
|
||||||
|
|
||||||
|
// type related type
|
||||||
|
DT_BASE_INT = 36; // type generic int
|
||||||
|
DT_BASE_UINT = 37; // type generate unsigned int
|
||||||
|
DT_BASE_FLOAT = 38; // type generate float
|
||||||
|
DT_TYPE = 39; // type type
|
||||||
|
DT_ANYTHING = 40; // type anything
|
||||||
|
DT_REFKEY = 41; // type refkey
|
||||||
|
DT_REF = 42; // type ref
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value definition for attribute value or parameter default value
|
||||||
|
message ValueProto {
|
||||||
|
// data type of value
|
||||||
|
optional DataType dtype = 1; // discriminator that indicates which field below is in use
|
||||||
|
|
||||||
|
// Exactly ONE of the following fields must be present for this version of the IR
|
||||||
|
optional bool bool_val = 2; // bool
|
||||||
|
optional int64 int_val = 3; // int
|
||||||
|
optional uint64 uint_val = 4; // uint
|
||||||
|
optional float float_val = 5; // float
|
||||||
|
optional double double_val = 6; // double
|
||||||
|
optional string str_val = 7; // string
|
||||||
|
optional TensorProto tensor_val = 8; // tensor value
|
||||||
|
optional GraphProto graph = 9; // graph
|
||||||
|
|
||||||
|
repeated bool bool_vals = 10; // list of bool
|
||||||
|
repeated int64 int_vals = 11; // list of int
|
||||||
|
repeated uint64 uint_vals = 12; // list of uint
|
||||||
|
repeated float float_vals = 13; // list of float
|
||||||
|
repeated double double_vals = 14; // list of double
|
||||||
|
repeated string str_vals = 15; // list of string
|
||||||
|
repeated TensorProto tensor_vals = 16; // list of tensor value
|
||||||
|
repeated GraphProto graphs = 17; // list of graph
|
||||||
|
|
||||||
|
// tuple or list
|
||||||
|
repeated ValueProto values = 18; // tuple, list of value
|
||||||
|
|
||||||
|
// dictionary
|
||||||
|
repeated NamedValueProto dict_val = 19; // dictionary info
|
||||||
|
|
||||||
|
// filed for type type
|
||||||
|
optional TypeProto type_val = 20; // type type info
|
||||||
|
}
|
||||||
|
|
||||||
|
message AttributeProto {
|
||||||
|
optional string name = 1; // attribute name
|
||||||
|
optional ValueProto value = 2; // attribute value
|
||||||
|
}
|
||||||
|
|
||||||
|
message NamedValueProto {
|
||||||
|
optional string key = 1; // attribute name
|
||||||
|
optional ValueProto value = 2; // attribute value
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines a tensor shape.
|
||||||
|
message TensorShapeProto {
|
||||||
|
// One dimension of the tensor.
|
||||||
|
message Dimension {
|
||||||
|
// Size of the tensor in that dimension.
|
||||||
|
// This value must be >= -1, but values of -1 are reserved for "unknown"
|
||||||
|
// shapes (values of -1 mean "unknown" dimension).
|
||||||
|
optional int64 size = 1;
|
||||||
|
|
||||||
|
// Optional name of the tensor dimension.
|
||||||
|
optional string name = 2;
|
||||||
|
};
|
||||||
|
|
||||||
|
repeated Dimension dim = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Types for graph input(parameter) and output
|
||||||
|
message TypeProto {
|
||||||
|
|
||||||
|
message Tensor {
|
||||||
|
// This field MUST have a valid DataType value except DT_TENSOR
|
||||||
|
optional DataType elem_type = 1;
|
||||||
|
optional TensorShapeProto shape = 2; // for scalar, this field is not set
|
||||||
|
}
|
||||||
|
|
||||||
|
// tuple type
|
||||||
|
message Sequence {
|
||||||
|
// The type and optional shape of elements of the tuple.
|
||||||
|
repeated TypeProto elem_types = 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
// data type
|
||||||
|
optional DataType data_type = 1;
|
||||||
|
|
||||||
|
oneof value {
|
||||||
|
// The type of a tensor.
|
||||||
|
Tensor tensor_type = 2;
|
||||||
|
|
||||||
|
// The type of a tuple.
|
||||||
|
Sequence sequence_type = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines information on graph parameters, including the name, the type, and
|
||||||
|
// the default value of parameter if exists.
|
||||||
|
message ParameterProto {
|
||||||
|
optional string name = 1; // parameter name
|
||||||
|
optional TypeProto type = 2; // parameter type
|
||||||
|
optional ValueProto default_val = 3; // default value of parameter if exists
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines graph output information
|
||||||
|
message OutputProto {
|
||||||
|
optional string name = 1; // output node name
|
||||||
|
optional TypeProto type = 2; // output node type
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define node input information
|
||||||
|
message InputProto {
|
||||||
|
enum EdgeType {
|
||||||
|
DATA_EDGE = 0; // data edge
|
||||||
|
CONTROL_EDGE = 1; // control edge
|
||||||
|
}
|
||||||
|
|
||||||
|
optional string name = 1;
|
||||||
|
optional EdgeType type = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nodes
|
||||||
|
//
|
||||||
|
// Computation graphs are made up of a DAG of nodes, which represent what is
|
||||||
|
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
|
||||||
|
//
|
||||||
|
// For example, it can be a node of type "Conv" that takes in an image, a filter
|
||||||
|
// tensor and a bias tensor, and produces the convolved output.
|
||||||
|
message NodeProto {
|
||||||
|
repeated InputProto input = 1; // namespace Value
|
||||||
|
optional string name = 2; // namespace Value
|
||||||
|
|
||||||
|
// The symbolic identifier of the Operator to execute.
|
||||||
|
optional string op_type = 3; // namespace Operator
|
||||||
|
// The domain of the OperatorSet that specifies the operator named by op_type.
|
||||||
|
optional string scope = 4; // namespace Domain
|
||||||
|
|
||||||
|
// Additional named attributes.
|
||||||
|
repeated AttributeProto attribute = 5;
|
||||||
|
|
||||||
|
// Optional type info of this node
|
||||||
|
optional TypeProto output_type = 6;
|
||||||
|
|
||||||
|
// other fields for debug
|
||||||
|
optional uint64 output_i = 7;
|
||||||
|
|
||||||
|
// for debugger, full name with scope
|
||||||
|
optional string debug_name = 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Models
|
||||||
|
//
|
||||||
|
// ModelProto is a top-level file/container format for bundling a ML model and
|
||||||
|
// associating its computation graph with metadata.
|
||||||
|
//
|
||||||
|
// The semantics of the model are described by the associated GraphProto.
|
||||||
|
message ModelProto {
|
||||||
|
// ir version
|
||||||
|
optional int64 ir_version = 1;
|
||||||
|
|
||||||
|
// Domain name of the model.
|
||||||
|
// We use reverse domain names as name space indicators. For example:
|
||||||
|
// `com.facebook.fair` or `com.microsoft.cognitiveservices`
|
||||||
|
//
|
||||||
|
// Together with `model_version` and GraphProto.name, this forms the unique identity of
|
||||||
|
// the graph.
|
||||||
|
optional string domain = 2;
|
||||||
|
|
||||||
|
// The version of the graph encoded. See Version enum below.
|
||||||
|
optional int64 model_version = 3;
|
||||||
|
|
||||||
|
// The parameterized graph that is evaluated to execute the model.
|
||||||
|
optional GraphProto graph = 4;
|
||||||
|
|
||||||
|
// metadata info of opeartors
|
||||||
|
optional OperatorSetProto metadata_operators = 5;
|
||||||
|
};
|
||||||
|
|
||||||
|
message OperatorProto {
|
||||||
|
optional string name = 1; // used as key, must be distinct
|
||||||
|
optional bytes config = 2; // operator config info
|
||||||
|
optional bytes obj_info = 3; // operator related object info, e.g. content of operator binary or name
|
||||||
|
};
|
||||||
|
|
||||||
|
message OperatorSetProto {
|
||||||
|
repeated OperatorProto operators = 1;
|
||||||
|
optional string summary = 2; // summary info of operators, e.g. file position of operators file
|
||||||
|
}
|
||||||
|
|
||||||
|
// Graphs
|
||||||
|
//
|
||||||
|
// A graph defines the computational logic of a model and is comprised of a parameterized
|
||||||
|
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
|
||||||
|
// This is the equivalent of the "network" or "graph" in many deep learning
|
||||||
|
// frameworks.
|
||||||
|
message GraphProto {
|
||||||
|
// The nodes in the graph, sorted topologically.
|
||||||
|
repeated NodeProto node = 1;
|
||||||
|
|
||||||
|
// The name of the graph.
|
||||||
|
optional string name = 2; // namespace Graph
|
||||||
|
|
||||||
|
// The parameters(inputs) and outputs of the graph.
|
||||||
|
repeated ParameterProto parameters = 3;
|
||||||
|
repeated OutputProto outputs = 4;
|
||||||
|
|
||||||
|
// Constants used in this graph
|
||||||
|
repeated NamedValueProto const_vals = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tensors
|
||||||
|
//
|
||||||
|
// A serialized tensor value.
|
||||||
|
message TensorProto {
|
||||||
|
// The node name of the tensor.
|
||||||
|
optional string node_name = 1;
|
||||||
|
|
||||||
|
// The slot of the tensor in its node.
|
||||||
|
optional string slot = 2;
|
||||||
|
|
||||||
|
// The serialized tensor content.
|
||||||
|
optional bytes tensor_content = 3;
|
||||||
|
|
||||||
|
// The shape of the tensor.
|
||||||
|
repeated int64 dims = 4;
|
||||||
|
|
||||||
|
// The data type of the tensor.
|
||||||
|
// This field MUST have a valid DataType value except DT_TENSOR
|
||||||
|
optional DataType data_type = 5;
|
||||||
|
|
||||||
|
// If the tensor content transferring is finished.
|
||||||
|
optional bool finished = 6;
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package debugger;
|
||||||
|
|
||||||
|
import "debug_graph.proto";
|
||||||
|
|
||||||
|
service EventListener {
|
||||||
|
rpc WaitCMD (Metadata) returns (EventReply) {};
|
||||||
|
rpc SendMetadata (Metadata) returns (EventReply) {};
|
||||||
|
rpc SendGraph (GraphProto) returns (EventReply) {};
|
||||||
|
rpc SendTensors (stream TensorProto) returns (EventReply) {};
|
||||||
|
rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {};
|
||||||
|
}
|
||||||
|
|
||||||
|
message Metadata {
|
||||||
|
string device_name = 1;
|
||||||
|
int32 cur_step = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message EventReply {
|
||||||
|
enum Status {
|
||||||
|
OK = 0;
|
||||||
|
FAILED = 1;
|
||||||
|
PENDING = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status status = 1;
|
||||||
|
|
||||||
|
oneof cmd {
|
||||||
|
bool exit = 2;
|
||||||
|
int32 run_cmd = 3;
|
||||||
|
SetCMD set_cmd = 4;
|
||||||
|
ViewCMD view_cmd = 5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message SetCMD {
|
||||||
|
repeated WatchNode watch_nodes = 1;
|
||||||
|
WatchCondition watch_condition = 2;
|
||||||
|
bool delete = 3;
|
||||||
|
int32 id = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ViewCMD {
|
||||||
|
repeated TensorProto tensors = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WatchCondition {
|
||||||
|
enum Condition {
|
||||||
|
nan = 0;
|
||||||
|
inf = 1;
|
||||||
|
}
|
||||||
|
Condition condition = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WatchNode {
|
||||||
|
string node_name = 1;
|
||||||
|
string node_type = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WatchpointHit {
|
||||||
|
TensorProto tensor = 1;
|
||||||
|
WatchCondition watch_condition = 2;
|
||||||
|
int32 id = 3;
|
||||||
|
}
|
|
@ -0,0 +1,488 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#include "pipeline/pipeline.h"
|
||||||
|
#include "session/anf_runtime_algorithm.h"
|
||||||
|
|
||||||
|
using debugger::EventReply;
|
||||||
|
using debugger::GraphProto;
|
||||||
|
using debugger::ModelProto;
|
||||||
|
using debugger::TensorProto;
|
||||||
|
using debugger::WatchCondition;
|
||||||
|
using debugger::WatchCondition_Condition_inf;
|
||||||
|
using debugger::WatchCondition_Condition_nan;
|
||||||
|
using debugger::WatchNode;
|
||||||
|
using debugger::WatchpointHit;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
|
||||||
|
DebuggerPtr Debugger::debugger_ = nullptr;
|
||||||
|
std::mutex Debugger::instance_lock_;
|
||||||
|
|
||||||
|
Debugger::Debugger()
|
||||||
|
: grpc_client_(nullptr),
|
||||||
|
debug_services_(nullptr),
|
||||||
|
device_id_(0),
|
||||||
|
num_step_(0),
|
||||||
|
debugger_enabled_(false),
|
||||||
|
is_dataset_graph_(false) {}
|
||||||
|
|
||||||
|
void Debugger::Init(const uint32_t device_id) {
|
||||||
|
// access lock for public method
|
||||||
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
|
// save device_id
|
||||||
|
MS_LOG(INFO) << "Debugger got device_id: " << device_id;
|
||||||
|
device_id_ = device_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::EnableDebugger() {
|
||||||
|
// reset some of the class members
|
||||||
|
num_step_ = 0;
|
||||||
|
debugger_enabled_ = false;
|
||||||
|
grpc_client_ = nullptr;
|
||||||
|
debug_services_ = nullptr;
|
||||||
|
|
||||||
|
// get env variables to configure debugger
|
||||||
|
const char *env_enable_str = std::getenv("ENABLE_MS_DEBUGGER");
|
||||||
|
if (env_enable_str != nullptr) {
|
||||||
|
MS_LOG(INFO) << "Getenv ENABLE_MS_DEBUGGER: " << env_enable_str;
|
||||||
|
if (std::strcmp(env_enable_str, "1") == 0) {
|
||||||
|
debugger_enabled_ = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!debugger_enabled_) {
|
||||||
|
MS_LOG(WARNING) << "Not enabling debugger. Set environment variable ENABLE_MS_DEBUGGER=1 to enable debugger.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// configure host
|
||||||
|
const char *env_host_str = std::getenv("MS_DEBUGGER_HOST");
|
||||||
|
std::string host;
|
||||||
|
if (env_host_str != nullptr) {
|
||||||
|
MS_LOG(INFO) << "Getenv MS_DEBUGGER_HOST: " << env_host_str;
|
||||||
|
host = std::string(env_host_str);
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
|
||||||
|
host = "localhost";
|
||||||
|
}
|
||||||
|
// configure port
|
||||||
|
const char *env_port_str = std::getenv("MS_DEBUGGER_PORT");
|
||||||
|
std::string port;
|
||||||
|
if (env_port_str != nullptr) {
|
||||||
|
MS_LOG(INFO) << "Getenv MS_DEBUGGER_PORT: " << env_port_str;
|
||||||
|
port = std::string(env_port_str);
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051";
|
||||||
|
port = "50051";
|
||||||
|
}
|
||||||
|
|
||||||
|
// initialize grpc client
|
||||||
|
grpc_client_ = std::make_unique<GrpcClient>(host, port);
|
||||||
|
debug_services_ = std::make_unique<DebugServices>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::Reset() {
|
||||||
|
// access lock for public method
|
||||||
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
|
// reset components
|
||||||
|
device_id_ = 0;
|
||||||
|
num_step_ = 0;
|
||||||
|
debugger_enabled_ = false;
|
||||||
|
is_dataset_graph_ = false;
|
||||||
|
graph_ptr_ = nullptr;
|
||||||
|
grpc_client_ = nullptr;
|
||||||
|
debug_services_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
|
||||||
|
// access lock for public method
|
||||||
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
|
// check and save graph_ptr, suspend if graph is new
|
||||||
|
CheckGraphPtr(graph_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::PostExecute() {
|
||||||
|
// access lock for public method
|
||||||
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
|
// analyze tensor data and send the watchpoints been hit
|
||||||
|
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||||
|
num_step_++;
|
||||||
|
MS_LOG(INFO) << "Debugger suspend at end of step; number of steps executed: " << num_step_;
|
||||||
|
SendWatchpointsAndSuspend(CheckWatchpoints());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::PostDebugOp() {
|
||||||
|
// access lock for public method
|
||||||
|
std::lock_guard<std::mutex> a_lock(access_lock_);
|
||||||
|
// suspend if debugger is enabled
|
||||||
|
if (debugger_enabled_ && !is_dataset_graph_) {
|
||||||
|
MS_LOG(INFO) << "Debugger suspend at debug_op";
|
||||||
|
CommandLoop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
|
||||||
|
if (graph_ptr_ != graph_ptr) {
|
||||||
|
MS_LOG(INFO) << "Debugger got new graph: " << graph_ptr->graph_id();
|
||||||
|
// save new graph_ptr
|
||||||
|
graph_ptr_ = graph_ptr;
|
||||||
|
// check if it is dataset graph
|
||||||
|
CheckDatasetGraph();
|
||||||
|
if (!is_dataset_graph_) {
|
||||||
|
// only try to enable debugger if it is not a dataset graph
|
||||||
|
EnableDebugger();
|
||||||
|
if (debugger_enabled_) {
|
||||||
|
// get graph proto and send to mindinsight
|
||||||
|
SendGraphAndSuspend(GetGraphProto());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::CheckDatasetGraph() {
|
||||||
|
// print parameter node names
|
||||||
|
const auto ¶ms = graph_ptr_->inputs();
|
||||||
|
for (const auto ¶m : params) {
|
||||||
|
MS_LOG(INFO) << "param: " << param->fullname_with_scope();
|
||||||
|
}
|
||||||
|
// check if there is GetNext or InitDataSetQueue node
|
||||||
|
const auto &nodes = graph_ptr_->execution_order();
|
||||||
|
for (const auto &node : nodes) {
|
||||||
|
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||||
|
MS_LOG(INFO) << "node: " << node->fullname_with_scope();
|
||||||
|
if (node_name == "GetNext" || node_name == "InitDataSetQueue") {
|
||||||
|
MS_LOG(WARNING) << "Not enabling debugger for graph " << graph_ptr_->graph_id() << ": found dataset graph node "
|
||||||
|
<< node_name;
|
||||||
|
is_dataset_graph_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
is_dataset_graph_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GraphProto Debugger::GetGraphProto() {
|
||||||
|
// convert kernel graph to debugger modelproto
|
||||||
|
ModelProto model = GetDebuggerFuncGraphProto(graph_ptr_);
|
||||||
|
return model.graph();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
|
||||||
|
// prepare metadata
|
||||||
|
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id());
|
||||||
|
Metadata metadata;
|
||||||
|
metadata.set_device_name(device_name);
|
||||||
|
metadata.set_cur_step(num_step_);
|
||||||
|
EventReply reply_metadata = grpc_client_->SendMetadata(metadata);
|
||||||
|
if (reply_metadata.status() != reply_metadata.OK) {
|
||||||
|
MS_LOG(ERROR) << "Error: SendMetadata failed";
|
||||||
|
}
|
||||||
|
// send graph to mindinght server
|
||||||
|
EventReply reply = grpc_client_->SendGraph(graph_proto);
|
||||||
|
if (reply.status() != reply.OK) {
|
||||||
|
MS_LOG(ERROR) << "Error: SendGraph failed";
|
||||||
|
}
|
||||||
|
// enter command loop, wait and process commands
|
||||||
|
CommandLoop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::CommandLoop() {
|
||||||
|
// prepare metadata
|
||||||
|
std::string device_name = std::to_string(device_id_) + ":" + std::to_string(graph_ptr_->graph_id());
|
||||||
|
Metadata metadata;
|
||||||
|
metadata.set_device_name(device_name);
|
||||||
|
metadata.set_cur_step(num_step_);
|
||||||
|
|
||||||
|
// loop exit flag
|
||||||
|
bool run = false;
|
||||||
|
int num_wait_fail = 0;
|
||||||
|
const int max_num_wait_fail = 5;
|
||||||
|
|
||||||
|
while (!run) {
|
||||||
|
// wait for command
|
||||||
|
EventReply reply = grpc_client_->WaitForCommand(metadata);
|
||||||
|
if (reply.status() != reply.OK) {
|
||||||
|
MS_LOG(ERROR) << "Error: WaitForCommand failed";
|
||||||
|
num_wait_fail++;
|
||||||
|
if (num_wait_fail > max_num_wait_fail) {
|
||||||
|
MS_LOG(ERROR) << "Maximum number of WaitForCommand retry reached: exiting training session";
|
||||||
|
Exit();
|
||||||
|
}
|
||||||
|
MS_LOG(ERROR) << "Number of consecutive WaitForCommand fail:" << num_wait_fail << "; Retry after "
|
||||||
|
<< num_wait_fail << "s";
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(1000 * num_wait_fail));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get type of the command in reply
|
||||||
|
DebuggerCommand cmd = GetCommand(reply);
|
||||||
|
if (cmd == DebuggerCommand::kUnknownCMD) {
|
||||||
|
MS_LOG(ERROR) << "Error: debugger recieved unknown command";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "recieved command: ";
|
||||||
|
switch (cmd) {
|
||||||
|
case DebuggerCommand::kUnknownCMD:
|
||||||
|
MS_LOG(INFO) << "UnknownCMD";
|
||||||
|
break;
|
||||||
|
case DebuggerCommand::kExitCMD:
|
||||||
|
MS_LOG(INFO) << "ExitCMD";
|
||||||
|
Exit();
|
||||||
|
break;
|
||||||
|
case DebuggerCommand::kRunCMD:
|
||||||
|
MS_LOG(INFO) << "RunCMD";
|
||||||
|
// exit loop
|
||||||
|
run = true;
|
||||||
|
break;
|
||||||
|
case DebuggerCommand::kSetCMD:
|
||||||
|
MS_LOG(INFO) << "SetCMD";
|
||||||
|
{
|
||||||
|
// print set cmd content
|
||||||
|
ProtoVector<WatchNode> recieved_nodes = GetWatchnodes(reply);
|
||||||
|
for (auto node : recieved_nodes) {
|
||||||
|
MS_LOG(INFO) << "node name: " << node.node_name();
|
||||||
|
MS_LOG(INFO) << "node type: " << node.node_type();
|
||||||
|
}
|
||||||
|
WatchCondition recieved_condition = GetWatchcondition(reply);
|
||||||
|
MS_LOG(INFO) << "condition: " << recieved_condition.condition();
|
||||||
|
int32_t id = GetWatchpointID(reply);
|
||||||
|
MS_LOG(INFO) << "id: " << id;
|
||||||
|
bool delete_ = GetWatchpointDelete(reply);
|
||||||
|
MS_LOG(INFO) << "delete: " << delete_;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Setting watchpoint";
|
||||||
|
if (GetWatchpointDelete(reply)) {
|
||||||
|
RemoveWatchpoint(GetWatchpointID(reply));
|
||||||
|
} else {
|
||||||
|
SetWatchpoint(GetWatchnodes(reply), GetWatchcondition(reply), GetWatchpointID(reply));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DebuggerCommand::kViewCMD:
|
||||||
|
MS_LOG(INFO) << "ViewCMD";
|
||||||
|
{
|
||||||
|
// print view cmd content
|
||||||
|
ProtoVector<TensorProto> received_tensors = GetTensors(reply);
|
||||||
|
for (auto tensor : received_tensors) {
|
||||||
|
MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
|
||||||
|
MS_LOG(INFO) << "tensor slot: " << tensor.slot();
|
||||||
|
MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Sending tensors";
|
||||||
|
std::list<TensorProto> tensors = LoadTensors(GetTensors(reply));
|
||||||
|
{
|
||||||
|
for (auto tensor : tensors) {
|
||||||
|
MS_LOG(INFO) << "tensor node name: " << tensor.node_name();
|
||||||
|
MS_LOG(INFO) << "tensor slot: " << tensor.slot();
|
||||||
|
MS_LOG(INFO) << "tensor finished: " << std::boolalpha << tensor.finished() << std::noboolalpha;
|
||||||
|
MS_LOG(INFO) << "tensor dims: ";
|
||||||
|
for (auto dim : tensor.dims()) {
|
||||||
|
MS_LOG(INFO) << dim << ",";
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "tensor dtype: " << tensor.data_type();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EventReply send_tensors_reply = grpc_client_->SendTensors(tensors);
|
||||||
|
if (send_tensors_reply.status() != send_tensors_reply.OK) {
|
||||||
|
MS_LOG(ERROR) << "Error: SendTensors failed";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DebuggerCommand Debugger::GetCommand(const EventReply &reply) {
|
||||||
|
DebuggerCommand cmd = DebuggerCommand::kUnknownCMD;
|
||||||
|
switch (reply.cmd_case()) {
|
||||||
|
case debugger::EventReply::CmdCase::kExit:
|
||||||
|
cmd = DebuggerCommand::kExitCMD;
|
||||||
|
break;
|
||||||
|
case debugger::EventReply::CmdCase::kRunCmd:
|
||||||
|
cmd = DebuggerCommand::kRunCMD;
|
||||||
|
break;
|
||||||
|
case debugger::EventReply::CmdCase::kSetCmd:
|
||||||
|
cmd = DebuggerCommand::kSetCMD;
|
||||||
|
break;
|
||||||
|
case debugger::EventReply::CmdCase::kViewCmd:
|
||||||
|
cmd = DebuggerCommand::kViewCMD;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
MS_LOG(ERROR) << "Error: UnknownCMD";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProtoVector<WatchNode> Debugger::GetWatchnodes(const EventReply &reply) {
|
||||||
|
if (!reply.has_set_cmd()) {
|
||||||
|
MS_LOG(ERROR) << "Error: Not SetCMD, can not get WatchNodes. Returning default value: ProtoVector<WatchNode>().";
|
||||||
|
return ProtoVector<WatchNode>();
|
||||||
|
}
|
||||||
|
return reply.set_cmd().watch_nodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
WatchCondition Debugger::GetWatchcondition(const EventReply &reply) {
|
||||||
|
if (!reply.has_set_cmd() || !reply.set_cmd().has_watch_condition()) {
|
||||||
|
MS_LOG(ERROR) << "Error: Can not get WatchCondition from command. Returning default value: WatchCondition().";
|
||||||
|
return WatchCondition();
|
||||||
|
}
|
||||||
|
return reply.set_cmd().watch_condition();
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t Debugger::GetWatchpointID(const EventReply &reply) {
|
||||||
|
if (!reply.has_set_cmd()) {
|
||||||
|
MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint ID. Returning default value: 0.";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return reply.set_cmd().id();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Debugger::GetWatchpointDelete(const EventReply &reply) {
|
||||||
|
if (!reply.has_set_cmd()) {
|
||||||
|
MS_LOG(ERROR) << "Error: Not SetCMD, can not get Watchpoint delete flag. Returning default value: false.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return reply.set_cmd().delete_();
|
||||||
|
}
|
||||||
|
|
||||||
|
ProtoVector<TensorProto> Debugger::GetTensors(const EventReply &reply) {
|
||||||
|
if (!reply.has_view_cmd()) {
|
||||||
|
MS_LOG(ERROR) << "Error: Not ViewCMD, can not get Tensors. Returning default value: ProtoVector<TensorProto>().";
|
||||||
|
return ProtoVector<TensorProto>();
|
||||||
|
}
|
||||||
|
return reply.view_cmd().tensors();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id) {
|
||||||
|
std::vector<std::tuple<std::string, bool>> check_node_list;
|
||||||
|
std::transform(nodes.begin(), nodes.end(), std::back_inserter(check_node_list),
|
||||||
|
[](WatchNode node) -> std::tuple<std::string, bool> {
|
||||||
|
return make_tuple(node.node_name(), node.node_type() == "scope");
|
||||||
|
});
|
||||||
|
|
||||||
|
debug_services_->add_watchpoint(id, condition.condition(), check_node_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::RemoveWatchpoint(const int32_t id) { debug_services_->remove_watchpoint(id); }
|
||||||
|
|
||||||
|
std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &tensors) {
|
||||||
|
std::vector<std::string> name;
|
||||||
|
std::vector<std::string> ret_name;
|
||||||
|
std::vector<char *> data_ptr;
|
||||||
|
std::vector<unsigned int> data_size;
|
||||||
|
std::vector<TypePtr> dtype;
|
||||||
|
std::vector<std::vector<int>> shape;
|
||||||
|
|
||||||
|
std::transform(tensors.begin(), tensors.end(), std::back_inserter(name),
|
||||||
|
[](TensorProto tensor) -> std::string { return tensor.node_name() + ":" + tensor.slot(); });
|
||||||
|
|
||||||
|
debug_services_->read_nodes_tensors(name, &ret_name, &data_ptr, &data_size, &dtype, &shape);
|
||||||
|
|
||||||
|
std::list<TensorProto> tensor_list;
|
||||||
|
unsigned int result_index = 0;
|
||||||
|
TensorProto tensor_item;
|
||||||
|
|
||||||
|
for (auto tensor : tensors) {
|
||||||
|
tensor_item.set_node_name(tensor.node_name());
|
||||||
|
tensor_item.set_slot(tensor.slot());
|
||||||
|
tensor_item.set_finished(true);
|
||||||
|
|
||||||
|
// return empty tensor if didn't find the requested tensor
|
||||||
|
if (result_index >= ret_name.size() || ret_name[result_index] != tensor.node_name() + ":" + tensor.slot()) {
|
||||||
|
tensor_list.push_back(tensor_item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor_item.set_tensor_content(data_ptr[result_index], data_size[result_index]);
|
||||||
|
tensor_item.set_data_type(GetDebuggerNumberDataType(dtype[result_index]));
|
||||||
|
tensor_item.clear_dims();
|
||||||
|
for (auto &elem : shape[result_index]) {
|
||||||
|
tensor_item.add_dims(elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor_list.push_back(tensor_item);
|
||||||
|
|
||||||
|
result_index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensor_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::Exit() {
|
||||||
|
// clear resource before exit
|
||||||
|
pipeline::ClearResAtexit();
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::list<WatchpointHit> Debugger::CheckWatchpoints() {
|
||||||
|
std::vector<std::string> name;
|
||||||
|
std::vector<std::string> slot;
|
||||||
|
std::vector<char *> data_ptr;
|
||||||
|
std::vector<unsigned int> data_size;
|
||||||
|
std::vector<int> condition;
|
||||||
|
std::vector<unsigned int> watchpoint_id;
|
||||||
|
|
||||||
|
debug_services_->check_watchpoints(&name, &slot, &data_ptr, &data_size, &condition, &watchpoint_id);
|
||||||
|
|
||||||
|
std::list<WatchpointHit> points;
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < name.size(); i++) {
|
||||||
|
TensorProto *tensor_item;
|
||||||
|
tensor_item = new TensorProto();
|
||||||
|
tensor_item->set_node_name(name[i]);
|
||||||
|
tensor_item->set_slot(slot[i]);
|
||||||
|
tensor_item->set_tensor_content(data_ptr[i], data_size[i]);
|
||||||
|
|
||||||
|
// finished in TensorProto will always be true before we implement big tensor splitting
|
||||||
|
tensor_item->set_finished(true);
|
||||||
|
|
||||||
|
WatchCondition *condition_item;
|
||||||
|
condition_item = new WatchCondition();
|
||||||
|
condition_item->set_condition(debugger::WatchCondition_Condition(condition[i]));
|
||||||
|
|
||||||
|
WatchpointHit point;
|
||||||
|
point.set_allocated_tensor(tensor_item);
|
||||||
|
point.set_allocated_watch_condition(condition_item);
|
||||||
|
point.set_id(watchpoint_id[i]);
|
||||||
|
|
||||||
|
points.push_back(point);
|
||||||
|
}
|
||||||
|
|
||||||
|
return points;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Debugger::SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points) {
|
||||||
|
// send info about watchpoint
|
||||||
|
if (!points.empty()) {
|
||||||
|
EventReply reply = grpc_client_->SendWatchpointHits(points);
|
||||||
|
if (reply.status() != reply.OK) {
|
||||||
|
MS_LOG(ERROR) << "Error: SendWatchpointHits failed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// enter command loop
|
||||||
|
CommandLoop();
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugServices *Debugger::get_debug_services() { return debug_services_.get(); }
|
||||||
|
|
||||||
|
bool Debugger::debugger_enabled() { return debugger_enabled_; }
|
||||||
|
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,159 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include "session/kernel_graph.h"
|
||||||
|
#include "debug/debugger/grpc_client.h"
|
||||||
|
#include "debug/debug_services.h"
|
||||||
|
|
||||||
|
using debugger::DataType;
|
||||||
|
using debugger::EventReply;
|
||||||
|
using debugger::GraphProto;
|
||||||
|
using debugger::ModelProto;
|
||||||
|
using debugger::TensorProto;
|
||||||
|
using debugger::WatchCondition;
|
||||||
|
using debugger::WatchNode;
|
||||||
|
using debugger::WatchpointHit;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
using ProtoVector = google::protobuf::RepeatedPtrField<T>;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
// different types of command recieved by debugger
|
||||||
|
// need to keep sync with client-side proto and server-side proto
|
||||||
|
enum class DebuggerCommand { kExitCMD = 2, kRunCMD = 3, kSetCMD = 4, kViewCMD = 5, kUnknownCMD = -1 };
|
||||||
|
|
||||||
|
class Debugger : public std::enable_shared_from_this<Debugger> {
|
||||||
|
public:
|
||||||
|
static std::shared_ptr<Debugger> GetInstance() {
|
||||||
|
std::lock_guard<std::mutex> i_lock(instance_lock_);
|
||||||
|
if (debugger_ == nullptr) {
|
||||||
|
debugger_ = std::shared_ptr<Debugger>(new (std::nothrow) Debugger());
|
||||||
|
}
|
||||||
|
return debugger_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// deconstructor
|
||||||
|
~Debugger() = default;
|
||||||
|
|
||||||
|
// init
|
||||||
|
// only save device_id
|
||||||
|
void Init(const uint32_t device_id);
|
||||||
|
|
||||||
|
// reset debugger
|
||||||
|
void Reset();
|
||||||
|
|
||||||
|
// enable debugger
|
||||||
|
// send graph and wait for command
|
||||||
|
// do nothing if graph is set already
|
||||||
|
void PreExecute(const KernelGraphPtr &graph_ptr);
|
||||||
|
|
||||||
|
// analyze tensors and wait for command
|
||||||
|
// don't need a graph_ptr because it is saved during pre_execute
|
||||||
|
void PostExecute();
|
||||||
|
|
||||||
|
// suspend the execution after a debug_op
|
||||||
|
void PostDebugOp();
|
||||||
|
|
||||||
|
DebugServices *get_debug_services();
|
||||||
|
|
||||||
|
bool debugger_enabled();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// private constructor for singleton
|
||||||
|
Debugger();
|
||||||
|
|
||||||
|
// enable debugger
|
||||||
|
// instantiate class members
|
||||||
|
// read env variable for grpc client
|
||||||
|
void EnableDebugger();
|
||||||
|
|
||||||
|
// check and save graph pointer
|
||||||
|
void CheckGraphPtr(const KernelGraphPtr &graph_ptr);
|
||||||
|
|
||||||
|
// check if the graph is a dataset graph
|
||||||
|
void CheckDatasetGraph();
|
||||||
|
|
||||||
|
// serialize graph and get proto
|
||||||
|
GraphProto GetGraphProto();
|
||||||
|
|
||||||
|
// send graph and enter command wait loop
|
||||||
|
void SendGraphAndSuspend(const GraphProto &graph_proto);
|
||||||
|
|
||||||
|
// wait for command and process command
|
||||||
|
// send command request and process reply in a loop
|
||||||
|
// break if RunCMD
|
||||||
|
void CommandLoop();
|
||||||
|
|
||||||
|
// process reply and command type
|
||||||
|
DebuggerCommand GetCommand(const EventReply &reply);
|
||||||
|
|
||||||
|
// parse other data out of EventReply
|
||||||
|
ProtoVector<WatchNode> GetWatchnodes(const EventReply &reply);
|
||||||
|
WatchCondition GetWatchcondition(const EventReply &reply);
|
||||||
|
int32_t GetWatchpointID(const EventReply &reply);
|
||||||
|
bool GetWatchpointDelete(const EventReply &reply);
|
||||||
|
ProtoVector<TensorProto> GetTensors(const EventReply &reply);
|
||||||
|
|
||||||
|
// set what nodes and conditions to watch
|
||||||
|
void SetWatchpoint(const ProtoVector<WatchNode> &nodes, const WatchCondition &condition, const int32_t id);
|
||||||
|
|
||||||
|
// remove watchpoint with id
|
||||||
|
void RemoveWatchpoint(const int32_t id);
|
||||||
|
|
||||||
|
// load tensor for view command
|
||||||
|
std::list<TensorProto> LoadTensors(const ProtoVector<TensorProto> &tensors);
|
||||||
|
|
||||||
|
// terminate training process
|
||||||
|
void Exit();
|
||||||
|
|
||||||
|
// analyze tensors and check watchpoint conditions
|
||||||
|
// return names of tensors and what condition they hit
|
||||||
|
std::list<WatchpointHit> CheckWatchpoints();
|
||||||
|
|
||||||
|
// send watchpoints that hit and enter command wait loop
|
||||||
|
void SendWatchpointsAndSuspend(const std::list<WatchpointHit> &points);
|
||||||
|
|
||||||
|
// class members
|
||||||
|
std::unique_ptr<GrpcClient> grpc_client_;
|
||||||
|
std::unique_ptr<DebugServices> debug_services_;
|
||||||
|
KernelGraphPtr graph_ptr_;
|
||||||
|
uint32_t device_id_;
|
||||||
|
int32_t num_step_;
|
||||||
|
bool debugger_enabled_;
|
||||||
|
bool is_dataset_graph_;
|
||||||
|
std::mutex access_lock_;
|
||||||
|
|
||||||
|
// singleton
|
||||||
|
static std::mutex instance_lock_;
|
||||||
|
static std::shared_ptr<Debugger> debugger_;
|
||||||
|
};
|
||||||
|
|
||||||
|
using DebuggerPtr = std::shared_ptr<Debugger>;
|
||||||
|
|
||||||
|
// get debugger ModelProto
|
||||||
|
std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph);
|
||||||
|
ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph);
|
||||||
|
|
||||||
|
// for getting proto DataType from Type of Tensor
|
||||||
|
DataType GetDebuggerNumberDataType(const TypePtr &type);
|
||||||
|
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_DEBUG_DEBUGGER_DEBUGGER_H_
|
|
@ -0,0 +1,124 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
#include "debug/debugger/grpc_client.h"
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
|
||||||
|
using debugger::EventListener;
|
||||||
|
using debugger::EventReply;
|
||||||
|
using debugger::EventReply_Status_FAILED;
|
||||||
|
using debugger::GraphProto;
|
||||||
|
using debugger::Metadata;
|
||||||
|
using debugger::TensorProto;
|
||||||
|
using debugger::WatchpointHit;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
GrpcClient::GrpcClient(const std::string &host, const std::string &port) : stub_(nullptr) { Init(host, port); }
|
||||||
|
|
||||||
|
void GrpcClient::Init(const std::string &host, const std::string &port) {
|
||||||
|
std::string target_str = host + ":" + port;
|
||||||
|
MS_LOG(INFO) << "GrpcClient connecting to: " << target_str;
|
||||||
|
|
||||||
|
std::shared_ptr<grpc::Channel> channel = grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials());
|
||||||
|
stub_ = EventListener::NewStub(channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GrpcClient::Reset() { stub_ = nullptr; }
|
||||||
|
|
||||||
|
EventReply GrpcClient::WaitForCommand(const Metadata &metadata) {
|
||||||
|
EventReply reply;
|
||||||
|
grpc::ClientContext context;
|
||||||
|
grpc::Status status = stub_->WaitCMD(&context, metadata, &reply);
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
MS_LOG(ERROR) << "RPC failed: WaitForCommand";
|
||||||
|
MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
|
||||||
|
reply.set_status(EventReply_Status_FAILED);
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
EventReply GrpcClient::SendMetadata(const Metadata &metadata) {
|
||||||
|
EventReply reply;
|
||||||
|
grpc::ClientContext context;
|
||||||
|
grpc::Status status = stub_->SendMetadata(&context, metadata, &reply);
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
MS_LOG(ERROR) << "RPC failed: SendMetadata";
|
||||||
|
MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
|
||||||
|
reply.set_status(EventReply_Status_FAILED);
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
EventReply GrpcClient::SendGraph(const GraphProto &graph) {
|
||||||
|
EventReply reply;
|
||||||
|
grpc::ClientContext context;
|
||||||
|
grpc::Status status = stub_->SendGraph(&context, graph, &reply);
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
MS_LOG(ERROR) << "RPC failed: SendGraph";
|
||||||
|
MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
|
||||||
|
reply.set_status(EventReply_Status_FAILED);
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
EventReply GrpcClient::SendTensors(const std::list<TensorProto> &tensors) {
|
||||||
|
EventReply reply;
|
||||||
|
grpc::ClientContext context;
|
||||||
|
|
||||||
|
std::unique_ptr<grpc::ClientWriter<TensorProto> > writer(stub_->SendTensors(&context, &reply));
|
||||||
|
for (const auto &tensor : tensors) {
|
||||||
|
if (!writer->Write(tensor)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||||
|
}
|
||||||
|
writer->WritesDone();
|
||||||
|
grpc::Status status = writer->Finish();
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
MS_LOG(ERROR) << "RPC failed: SendTensors";
|
||||||
|
MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
|
||||||
|
reply.set_status(EventReply_Status_FAILED);
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
EventReply GrpcClient::SendWatchpointHits(const std::list<WatchpointHit> &watchpoints) {
|
||||||
|
EventReply reply;
|
||||||
|
grpc::ClientContext context;
|
||||||
|
|
||||||
|
std::unique_ptr<grpc::ClientWriter<WatchpointHit> > writer(stub_->SendWatchpointHits(&context, &reply));
|
||||||
|
for (const auto &watchpoint : watchpoints) {
|
||||||
|
if (!writer->Write(watchpoint)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||||
|
}
|
||||||
|
writer->WritesDone();
|
||||||
|
grpc::Status status = writer->Finish();
|
||||||
|
|
||||||
|
if (!status.ok()) {
|
||||||
|
MS_LOG(ERROR) << "RPC failed: SendWatchpointHits";
|
||||||
|
MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
|
||||||
|
reply.set_status(EventReply_Status_FAILED);
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEBUG_DEBUGGER_GRPC_CLIENT_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEBUG_DEBUGGER_GRPC_CLIENT_H_
|
||||||
|
|
||||||
|
#include <grpcpp/grpcpp.h>
|
||||||
|
#include <string>
|
||||||
|
#include <list>
|
||||||
|
#include <memory>
|
||||||
|
#include "proto/debug_grpc.grpc.pb.h"
|
||||||
|
|
||||||
|
using debugger::EventListener;
|
||||||
|
using debugger::EventReply;
|
||||||
|
using debugger::GraphProto;
|
||||||
|
using debugger::Metadata;
|
||||||
|
using debugger::TensorProto;
|
||||||
|
using debugger::WatchpointHit;
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
class GrpcClient {
|
||||||
|
public:
|
||||||
|
// constructor
|
||||||
|
GrpcClient(const std::string &host, const std::string &port);
|
||||||
|
|
||||||
|
// deconstructor
|
||||||
|
~GrpcClient() = default;
|
||||||
|
|
||||||
|
// init
|
||||||
|
void Init(const std::string &host, const std::string &port);
|
||||||
|
|
||||||
|
// reset
|
||||||
|
void Reset();
|
||||||
|
|
||||||
|
EventReply WaitForCommand(const Metadata &metadata);
|
||||||
|
|
||||||
|
EventReply SendMetadata(const Metadata &metadata);
|
||||||
|
|
||||||
|
EventReply SendGraph(const GraphProto &graph);
|
||||||
|
|
||||||
|
EventReply SendTensors(const std::list<TensorProto> &tensors);
|
||||||
|
|
||||||
|
EventReply SendWatchpointHits(const std::list<WatchpointHit> &watchpoints);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<EventListener::Stub> stub_;
|
||||||
|
};
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_DEBUG_DEBUGGER_GRPC_CLIENT_H_
|
|
@ -0,0 +1,542 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#include "proto/debug_graph.pb.h"
|
||||||
|
#include "utils/graph_utils.h"
|
||||||
|
#include "utils/symbolic.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
class DebuggerProtoExporter {
|
||||||
|
public:
|
||||||
|
DebuggerProtoExporter() {}
|
||||||
|
~DebuggerProtoExporter() {}
|
||||||
|
|
||||||
|
std::string GetFuncGraphProtoString(const FuncGraphPtr &func_graph);
|
||||||
|
debugger::ModelProto GetFuncGraphProto(const FuncGraphPtr &func_graph);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void InitModelInfo();
|
||||||
|
void GetOpNodeTypeAndAttrs(const FuncGraphPtr &func_graph, const AnfNodePtr &node, debugger::NodeProto *node_proto);
|
||||||
|
std::string GetOpNodeInputId(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
|
||||||
|
const std::map<AnfNodePtr, size_t> &apply_map,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr);
|
||||||
|
void SetValueToProto(const ValuePtr &attr_value, debugger::ValueProto *value_proto);
|
||||||
|
void SetScalarToProto(const ScalarPtr &val, debugger::ValueProto *value_proto);
|
||||||
|
void SetSequenceToProto(const ValueSequeuePtr &val, debugger::ValueProto *value_proto);
|
||||||
|
void SetDictionaryToProto(const ValueDictionaryPtr &val, debugger::ValueProto *value_proto);
|
||||||
|
void SetNodeOutputType(const AnfNodePtr &node, debugger::TypeProto *type_proto);
|
||||||
|
void SetNodeOutputType(const TypePtr &node, const BaseShapePtr &shape, debugger::TypeProto *type_proto);
|
||||||
|
|
||||||
|
void ExportFuncGraph(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto);
|
||||||
|
void ExportParameters(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto);
|
||||||
|
void ExportCNodes(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr);
|
||||||
|
void ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *apply_map_ptr,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr, debugger::GraphProto *graph_proto);
|
||||||
|
void ExportFuncGraphOutput(const FuncGraphPtr &func_graph, const CNodePtr &ret_node,
|
||||||
|
const std::map<AnfNodePtr, size_t> &apply_map, std::map<AnfNodePtr, size_t> *const_map_ptr,
|
||||||
|
debugger::GraphProto *graph_proto);
|
||||||
|
void ExportValueNodes(const std::map<AnfNodePtr, size_t> &const_map, debugger::GraphProto *graph_proto);
|
||||||
|
|
||||||
|
static std::string GetConstNodeId(size_t idx) { return std::string("cst") + std::to_string(idx); }
|
||||||
|
|
||||||
|
debugger::ModelProto model_;
|
||||||
|
};
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetNodeOutputType(const TypePtr &type, const BaseShapePtr &shape,
|
||||||
|
debugger::TypeProto *type_proto) {
|
||||||
|
if (type_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == nullptr) {
|
||||||
|
type_proto->set_data_type(debugger::DT_UNDEFINED);
|
||||||
|
} else if (type->isa<Number>()) {
|
||||||
|
type_proto->set_data_type(GetDebuggerNumberDataType(type));
|
||||||
|
} else if (type->isa<TensorType>()) {
|
||||||
|
TypePtr elem_type = dyn_cast<TensorType>(type)->element();
|
||||||
|
type_proto->mutable_tensor_type()->set_elem_type(GetDebuggerNumberDataType(elem_type));
|
||||||
|
type_proto->set_data_type(debugger::DT_TENSOR);
|
||||||
|
if (shape != nullptr && shape->isa<abstract::Shape>()) {
|
||||||
|
abstract::ShapePtr shape_info = dyn_cast<abstract::Shape>(shape);
|
||||||
|
for (const auto &elem : shape_info->shape()) {
|
||||||
|
type_proto->mutable_tensor_type()->mutable_shape()->add_dim()->set_size(elem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (type->isa<Tuple>()) {
|
||||||
|
TuplePtr tuple_type = dyn_cast<Tuple>(type);
|
||||||
|
type_proto->set_data_type(debugger::DT_TUPLE);
|
||||||
|
for (const auto &elem_type : tuple_type->elements()) {
|
||||||
|
SetNodeOutputType(elem_type, nullptr, type_proto->mutable_sequence_type()->add_elem_types());
|
||||||
|
}
|
||||||
|
} else if (type->isa<TypeType>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_TYPE);
|
||||||
|
} else if (type->isa<List>()) {
|
||||||
|
ListPtr list_type = dyn_cast<List>(type);
|
||||||
|
type_proto->set_data_type(debugger::DT_LIST);
|
||||||
|
for (const auto &elem_type : list_type->elements()) {
|
||||||
|
SetNodeOutputType(elem_type, nullptr, type_proto->mutable_sequence_type()->add_elem_types());
|
||||||
|
}
|
||||||
|
} else if (type->isa<TypeAnything>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_ANYTHING);
|
||||||
|
} else if (type->isa<RefKeyType>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_REFKEY);
|
||||||
|
} else if (type->isa<RefType>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_REF);
|
||||||
|
} else if (type->isa<Function>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_GRAPH);
|
||||||
|
} else if (type->isa<TypeNone>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_NONE);
|
||||||
|
} else if (type->isa<String>()) {
|
||||||
|
type_proto->set_data_type(debugger::DT_STRING);
|
||||||
|
} else if (type->isa<SymbolicKeyType>()) {
|
||||||
|
// Do Nothing.
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << "Unknown type: " << type->type_name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetNodeOutputType(const AnfNodePtr &node, debugger::TypeProto *type_proto) {
|
||||||
|
if (node == nullptr || type_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SetNodeOutputType(node->Type(), node->Shape(), type_proto);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetValueToProto(const ValuePtr &val, debugger::ValueProto *value_proto) {
|
||||||
|
if (val == nullptr || value_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val->isa<StringImm>()) {
|
||||||
|
const StringImmPtr &value = dyn_cast<StringImm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_STRING);
|
||||||
|
value_proto->set_str_val(value->value());
|
||||||
|
} else if (val->isa<Scalar>()) {
|
||||||
|
SetScalarToProto(dyn_cast<Scalar>(val), value_proto);
|
||||||
|
} else if (val->isa<Bool>()) {
|
||||||
|
value_proto->set_dtype(debugger::DT_TYPE);
|
||||||
|
value_proto->mutable_type_val()->set_data_type(debugger::DT_BOOL);
|
||||||
|
} else if (val->isa<Int>()) {
|
||||||
|
value_proto->set_dtype(debugger::DT_TYPE);
|
||||||
|
value_proto->mutable_type_val()->set_data_type(debugger::DT_BASE_INT);
|
||||||
|
} else if (val->isa<Float>()) {
|
||||||
|
value_proto->set_dtype(debugger::DT_TYPE);
|
||||||
|
value_proto->mutable_type_val()->set_data_type(debugger::DT_BASE_FLOAT);
|
||||||
|
} else if (val->isa<ValueSequeue>()) {
|
||||||
|
SetSequenceToProto(dyn_cast<ValueSequeue>(val), value_proto);
|
||||||
|
} else if (val->isa<None>()) {
|
||||||
|
value_proto->set_dtype(debugger::DT_NONE);
|
||||||
|
value_proto->set_str_val("None");
|
||||||
|
} else if (val->isa<SymbolicKeyInstance>()) {
|
||||||
|
SymbolicKeyInstancePtr sym_inst = dyn_cast<SymbolicKeyInstance>(val);
|
||||||
|
ParameterPtr sym_node = dyn_cast<Parameter>(sym_inst->node());
|
||||||
|
value_proto->set_dtype(debugger::DT_SYM_INST);
|
||||||
|
value_proto->set_str_val(sym_node == nullptr ? std::string("nullptr") : sym_node->ToString());
|
||||||
|
} else if (val->isa<ValueDictionary>()) {
|
||||||
|
SetDictionaryToProto(dyn_cast<ValueDictionary>(val), value_proto);
|
||||||
|
} else if (val->isa<tensor::Tensor>()) {
|
||||||
|
tensor::TensorPtr tensor_ptr = dyn_cast<tensor::Tensor>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_TENSOR);
|
||||||
|
debugger::TensorProto *tensor_proto = value_proto->mutable_tensor_val();
|
||||||
|
tensor_proto->set_data_type(GetDebuggerNumberDataType(tensor_ptr->Dtype()));
|
||||||
|
for (auto &elem : tensor_ptr->shape()) {
|
||||||
|
tensor_proto->add_dims(elem);
|
||||||
|
}
|
||||||
|
tensor_proto->set_tensor_content(tensor_ptr->data_c(), tensor_ptr->data().nbytes());
|
||||||
|
} else if (val->isa<TensorType>()) {
|
||||||
|
value_proto->set_dtype(debugger::DT_TYPE);
|
||||||
|
|
||||||
|
debugger::TypeProto *type_proto = value_proto->mutable_type_val();
|
||||||
|
type_proto->set_data_type(debugger::DT_TENSOR);
|
||||||
|
TypePtr elem_type = dyn_cast<TensorType>(val)->element();
|
||||||
|
type_proto->mutable_tensor_type()->set_elem_type(GetDebuggerNumberDataType(elem_type));
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Unsupported type " << val->type_name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetScalarToProto(const ScalarPtr &val, debugger::ValueProto *value_proto) {
|
||||||
|
if (val == nullptr || value_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val->isa<BoolImm>()) {
|
||||||
|
const BoolImmPtr &value = dyn_cast<BoolImm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_BOOL);
|
||||||
|
value_proto->set_bool_val(value->value());
|
||||||
|
} else if (val->isa<Int8Imm>()) {
|
||||||
|
const Int8ImmPtr &value = dyn_cast<Int8Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_INT8);
|
||||||
|
value_proto->set_int_val(value->value());
|
||||||
|
} else if (val->isa<Int16Imm>()) {
|
||||||
|
const Int16ImmPtr &value = dyn_cast<Int16Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_INT16);
|
||||||
|
value_proto->set_int_val(value->value());
|
||||||
|
} else if (val->isa<Int32Imm>()) {
|
||||||
|
const Int32ImmPtr &value = dyn_cast<Int32Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_INT32);
|
||||||
|
value_proto->set_int_val(value->value());
|
||||||
|
} else if (val->isa<Int64Imm>()) {
|
||||||
|
const Int64ImmPtr &value = dyn_cast<Int64Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_INT64);
|
||||||
|
value_proto->set_int_val(value->value());
|
||||||
|
} else if (val->isa<UInt8Imm>()) {
|
||||||
|
const UInt8ImmPtr &value = dyn_cast<UInt8Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_UINT8);
|
||||||
|
value_proto->set_uint_val(value->value());
|
||||||
|
} else if (val->isa<UInt16Imm>()) {
|
||||||
|
const UInt16ImmPtr &value = dyn_cast<UInt16Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_UINT16);
|
||||||
|
value_proto->set_uint_val(value->value());
|
||||||
|
} else if (val->isa<UInt32Imm>()) {
|
||||||
|
const UInt32ImmPtr &value = dyn_cast<UInt32Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_UINT32);
|
||||||
|
value_proto->set_uint_val(value->value());
|
||||||
|
} else if (val->isa<UInt64Imm>()) {
|
||||||
|
const UInt64ImmPtr &value = dyn_cast<UInt64Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_UINT64);
|
||||||
|
value_proto->set_uint_val(value->value());
|
||||||
|
} else if (val->isa<FP32Imm>()) {
|
||||||
|
const FP32ImmPtr &value = dyn_cast<FP32Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_FLOAT32);
|
||||||
|
value_proto->set_float_val(value->value());
|
||||||
|
} else if (val->isa<FP64Imm>()) {
|
||||||
|
const FP64ImmPtr &value = dyn_cast<FP64Imm>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_FLOAT64);
|
||||||
|
value_proto->set_double_val(value->value());
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << "Unknown scalar type " << val->ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetSequenceToProto(const ValueSequeuePtr &val, debugger::ValueProto *value_proto) {
|
||||||
|
if (val == nullptr || value_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val->isa<ValueTuple>()) {
|
||||||
|
const ValueTuplePtr &value = dyn_cast<ValueTuple>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_TUPLE);
|
||||||
|
for (const auto &item : value->value()) {
|
||||||
|
SetValueToProto(item, value_proto->add_values());
|
||||||
|
}
|
||||||
|
} else if (val->isa<ValueList>()) {
|
||||||
|
const ValueListPtr &value = dyn_cast<ValueList>(val);
|
||||||
|
value_proto->set_dtype(debugger::DT_LIST);
|
||||||
|
for (const auto &item : value->value()) {
|
||||||
|
SetValueToProto(item, value_proto->add_values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::SetDictionaryToProto(const ValueDictionaryPtr &val, debugger::ValueProto *value_proto) {
|
||||||
|
if (val == nullptr || value_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
value_proto->set_dtype(debugger::DT_DICT);
|
||||||
|
for (const auto &item : val->value()) {
|
||||||
|
debugger::NamedValueProto *named_val = value_proto->add_dict_val();
|
||||||
|
named_val->set_key(item.first);
|
||||||
|
SetValueToProto(item.second, named_val->mutable_value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::GetOpNodeTypeAndAttrs(const FuncGraphPtr &, const AnfNodePtr &node,
|
||||||
|
debugger::NodeProto *node_proto) {
|
||||||
|
if (node == nullptr || node_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->isa<CNode>() || node->isa<Parameter>() || IsValueNode<FuncGraph>(node)) {
|
||||||
|
MS_LOG(EXCEPTION) << "Op node can not be CNode, Parameter or ValueNode Graph. But got " << node->ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsValueNode<Primitive>(node)) {
|
||||||
|
MS_LOG(EXCEPTION) << "Op node is not primitive: " << node->ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
const PrimitivePtr &prim = GetValueNode<PrimitivePtr>(node);
|
||||||
|
node_proto->set_op_type(prim->name());
|
||||||
|
for (const auto &attr : prim->attrs()) {
|
||||||
|
debugger::AttributeProto *attr_proto = node_proto->add_attribute();
|
||||||
|
attr_proto->set_name(attr.first);
|
||||||
|
SetValueToProto(attr.second, attr_proto->mutable_value());
|
||||||
|
}
|
||||||
|
node_proto->set_scope(node->scope()->name());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string DebuggerProtoExporter::GetOpNodeInputId(const FuncGraphPtr &, const AnfNodePtr &node,
|
||||||
|
const std::map<AnfNodePtr, size_t> &apply_map,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr) {
|
||||||
|
if (node == nullptr || const_map_ptr == nullptr) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->isa<CNode>()) {
|
||||||
|
auto iter = apply_map.find(node);
|
||||||
|
if (iter == apply_map.end()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Can not find node '" << node->ToString() << "' in apply_map";
|
||||||
|
}
|
||||||
|
return std::to_string(iter->second);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->isa<Parameter>()) {
|
||||||
|
return node->ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->isa<ValueNode>()) {
|
||||||
|
auto iter = const_map_ptr->find(node);
|
||||||
|
if (iter == const_map_ptr->end()) {
|
||||||
|
// Start index number from 1
|
||||||
|
auto const_idx = const_map_ptr->size() + 1;
|
||||||
|
(*const_map_ptr)[node] = const_idx;
|
||||||
|
}
|
||||||
|
return GetConstNodeId((*const_map_ptr)[node]);
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(EXCEPTION) << "Unknown node type. node is '" << node->ToString() << "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string DebuggerProtoExporter::GetFuncGraphProtoString(const FuncGraphPtr &func_graph) {
|
||||||
|
if (func_graph == nullptr) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
InitModelInfo();
|
||||||
|
debugger::GraphProto *graph_proto = model_.mutable_graph();
|
||||||
|
ExportFuncGraph(func_graph, graph_proto);
|
||||||
|
return model_.SerializeAsString();
|
||||||
|
}
|
||||||
|
|
||||||
|
debugger::ModelProto DebuggerProtoExporter::GetFuncGraphProto(const FuncGraphPtr &func_graph) {
|
||||||
|
if (func_graph == nullptr) {
|
||||||
|
return ModelProto();
|
||||||
|
}
|
||||||
|
|
||||||
|
InitModelInfo();
|
||||||
|
debugger::GraphProto *graph_proto = model_.mutable_graph();
|
||||||
|
ExportFuncGraph(func_graph, graph_proto);
|
||||||
|
return model_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportFuncGraph(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto) {
|
||||||
|
if (func_graph == nullptr || graph_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// map for store ValueNodes of this graph
|
||||||
|
std::map<AnfNodePtr, size_t> const_map;
|
||||||
|
|
||||||
|
// set graph name
|
||||||
|
graph_proto->set_name(func_graph->ToString());
|
||||||
|
|
||||||
|
ExportParameters(func_graph, graph_proto);
|
||||||
|
|
||||||
|
ExportCNodes(func_graph, graph_proto, &const_map);
|
||||||
|
|
||||||
|
ExportValueNodes(const_map, graph_proto);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportParameters(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto) {
|
||||||
|
if (func_graph == nullptr || graph_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// cast FuncGraph to KernelGraph to access inputs()
|
||||||
|
std::vector<AnfNodePtr> parameters = static_cast<session::KernelGraph *>(func_graph.get())->inputs();
|
||||||
|
|
||||||
|
for (auto ¶m : parameters) {
|
||||||
|
debugger::ParameterProto *param_proto = graph_proto->add_parameters();
|
||||||
|
param_proto->set_name(param->ToString());
|
||||||
|
|
||||||
|
SetNodeOutputType(param, param_proto->mutable_type());
|
||||||
|
|
||||||
|
const ParameterPtr param_ptr = dyn_cast<Parameter>(param);
|
||||||
|
if (param_ptr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Parameter '" << param->ToString() << "' could not cast to parameter.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportCNodes(const FuncGraphPtr &func_graph, debugger::GraphProto *graph_proto,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr) {
|
||||||
|
if (func_graph == nullptr || graph_proto == nullptr || const_map_ptr == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// topo sort nodes
|
||||||
|
std::vector<AnfNodePtr> nodes = TopoSort(func_graph->get_return(), SuccIncoming, AlwaysInclude);
|
||||||
|
std::map<AnfNodePtr, size_t> apply_map;
|
||||||
|
for (const AnfNodePtr &node : nodes) {
|
||||||
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
if (!node->isa<CNode>()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto cnode = node->cast<CNodePtr>();
|
||||||
|
if (cnode != func_graph->get_return()) {
|
||||||
|
ExportCNode(func_graph, cnode, &apply_map, const_map_ptr, graph_proto);
|
||||||
|
} else {
|
||||||
|
ExportFuncGraphOutput(func_graph, cnode, apply_map, const_map_ptr, graph_proto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &node,
|
||||||
|
std::map<AnfNodePtr, size_t> *apply_map_ptr,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr,
|
||||||
|
debugger::GraphProto *graph_proto) {
|
||||||
|
if (func_graph == nullptr || node == nullptr || apply_map_ptr == nullptr || const_map_ptr == nullptr ||
|
||||||
|
graph_proto == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto apply_idx = apply_map_ptr->size() + 1;
|
||||||
|
(*apply_map_ptr)[node] = apply_idx;
|
||||||
|
|
||||||
|
auto &inputs = node->inputs();
|
||||||
|
if (inputs.size() < 1) {
|
||||||
|
MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
|
||||||
|
}
|
||||||
|
AnfNodePtr op = inputs[0];
|
||||||
|
debugger::NodeProto *node_proto = graph_proto->add_node();
|
||||||
|
|
||||||
|
// CNode/ConstGraph/Const/Parameter
|
||||||
|
if (op->isa<CNode>() || IsValueNode<FuncGraph>(op) || op->isa<Parameter>()) {
|
||||||
|
MS_LOG(WARNING) << "Operator must be a primitive";
|
||||||
|
} else {
|
||||||
|
GetOpNodeTypeAndAttrs(func_graph, op, node_proto);
|
||||||
|
node_proto->set_name(std::to_string(apply_idx));
|
||||||
|
node_proto->set_scope(node->scope()->name());
|
||||||
|
|
||||||
|
// add debug_name for debugger
|
||||||
|
node_proto->set_debug_name(node->fullname_with_scope());
|
||||||
|
|
||||||
|
// process OP inputs
|
||||||
|
for (size_t i = 1; i < inputs.size(); ++i) {
|
||||||
|
debugger::InputProto *input_proto = node_proto->add_input();
|
||||||
|
input_proto->set_type(debugger::InputProto_EdgeType_DATA_EDGE);
|
||||||
|
std::string id = GetOpNodeInputId(func_graph, inputs[i], *apply_map_ptr, const_map_ptr);
|
||||||
|
input_proto->set_name(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// set node output type
|
||||||
|
SetNodeOutputType(node, node_proto->mutable_output_type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportFuncGraphOutput(const FuncGraphPtr &func_graph, const CNodePtr &ret_node,
|
||||||
|
const std::map<AnfNodePtr, size_t> &apply_map,
|
||||||
|
std::map<AnfNodePtr, size_t> *const_map_ptr,
|
||||||
|
debugger::GraphProto *graph_proto) {
|
||||||
|
if (ret_node == nullptr || !ret_node->isa<CNode>()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Graph return node is illegal";
|
||||||
|
}
|
||||||
|
AnfNodePtr arg = ret_node->input(1);
|
||||||
|
if (graph_proto == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "graph_proto is nullptr";
|
||||||
|
}
|
||||||
|
debugger::OutputProto *output_proto = graph_proto->add_outputs();
|
||||||
|
if (output_proto == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "output_proto is nullptr";
|
||||||
|
}
|
||||||
|
std::string id = GetOpNodeInputId(func_graph, arg, apply_map, const_map_ptr);
|
||||||
|
output_proto->set_name(id);
|
||||||
|
SetNodeOutputType(arg, output_proto->mutable_type());
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool CompareValue(const std::pair<AnfNodePtr, size_t> &x, const std::pair<AnfNodePtr, size_t> &y) {
|
||||||
|
return x.second < y.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::ExportValueNodes(const std::map<AnfNodePtr, size_t> &const_map,
|
||||||
|
debugger::GraphProto *graph_proto) {
|
||||||
|
std::vector<std::pair<AnfNodePtr, size_t>> nodes;
|
||||||
|
(void)std::transform(const_map.cbegin(), const_map.cend(), std::back_inserter(nodes),
|
||||||
|
[](const std::pair<AnfNodePtr, size_t> &item) { return item; });
|
||||||
|
|
||||||
|
sort(nodes.begin(), nodes.end(), CompareValue);
|
||||||
|
|
||||||
|
for (auto &item : nodes) {
|
||||||
|
if (graph_proto == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "graph_proto is nullptr";
|
||||||
|
}
|
||||||
|
debugger::NamedValueProto *named_value = graph_proto->add_const_vals();
|
||||||
|
MS_EXCEPTION_IF_NULL(named_value);
|
||||||
|
named_value->set_key(GetConstNodeId(item.second));
|
||||||
|
SetValueToProto(GetValueNode(item.first), named_value->mutable_value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DebuggerProtoExporter::InitModelInfo() { model_.set_ir_version(debugger::IR_VERSION); }
|
||||||
|
|
||||||
|
std::string GetDebuggerFuncGraphProtoString(const FuncGraphPtr &func_graph) {
|
||||||
|
DebuggerProtoExporter exporter;
|
||||||
|
return exporter.GetFuncGraphProtoString(func_graph);
|
||||||
|
}
|
||||||
|
|
||||||
|
debugger::ModelProto GetDebuggerFuncGraphProto(const FuncGraphPtr &func_graph) {
|
||||||
|
DebuggerProtoExporter exporter;
|
||||||
|
return exporter.GetFuncGraphProto(func_graph);
|
||||||
|
}
|
||||||
|
|
||||||
|
debugger::DataType GetDebuggerNumberDataType(const TypePtr &type) {
|
||||||
|
switch (type->type_id()) {
|
||||||
|
case kNumberTypeBool:
|
||||||
|
return debugger::DT_BOOL;
|
||||||
|
case kNumberTypeInt8:
|
||||||
|
return debugger::DT_INT8;
|
||||||
|
case kNumberTypeInt16:
|
||||||
|
return debugger::DT_INT16;
|
||||||
|
case kNumberTypeInt32:
|
||||||
|
return debugger::DT_INT32;
|
||||||
|
case kNumberTypeInt64:
|
||||||
|
return debugger::DT_INT64;
|
||||||
|
case kNumberTypeUInt8:
|
||||||
|
return debugger::DT_UINT8;
|
||||||
|
case kNumberTypeUInt16:
|
||||||
|
return debugger::DT_UINT16;
|
||||||
|
case kNumberTypeUInt32:
|
||||||
|
return debugger::DT_UINT32;
|
||||||
|
case kNumberTypeUInt64:
|
||||||
|
return debugger::DT_UINT64;
|
||||||
|
case kNumberTypeFloat16:
|
||||||
|
return debugger::DT_FLOAT16;
|
||||||
|
case kNumberTypeFloat32:
|
||||||
|
return debugger::DT_FLOAT32;
|
||||||
|
case kNumberTypeFloat64:
|
||||||
|
return debugger::DT_FLOAT64;
|
||||||
|
case kNumberTypeInt:
|
||||||
|
return debugger::DT_BASE_INT;
|
||||||
|
case kNumberTypeUInt:
|
||||||
|
return debugger::DT_BASE_UINT;
|
||||||
|
case kNumberTypeFloat:
|
||||||
|
return debugger::DT_BASE_FLOAT;
|
||||||
|
default:
|
||||||
|
MS_LOG(EXCEPTION) << "Unexpected type " << type->type_name();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,75 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ir/tensor.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
class TensorData {
|
||||||
|
private:
|
||||||
|
mindspore::tensor::TensorPtr tensor_ptr;
|
||||||
|
std::string name;
|
||||||
|
size_t slot;
|
||||||
|
int execution_order;
|
||||||
|
|
||||||
|
public:
|
||||||
|
TensorData() : slot(0), execution_order(-1) {}
|
||||||
|
|
||||||
|
TensorData(const TensorData &obj) {
|
||||||
|
std::cout << "Copy Constructor" << std::endl;
|
||||||
|
this->name = obj.name;
|
||||||
|
this->execution_order = obj.execution_order;
|
||||||
|
this->slot = obj.slot;
|
||||||
|
this->tensor_ptr = obj.tensor_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
~TensorData() {}
|
||||||
|
|
||||||
|
std::string GetName() { return this->name; }
|
||||||
|
|
||||||
|
mindspore::tensor::TensorPtr GetTensor() { return this->tensor_ptr; }
|
||||||
|
|
||||||
|
size_t GetSlot() { return this->slot; }
|
||||||
|
|
||||||
|
int GetExecutionOrder() { return this->execution_order; }
|
||||||
|
|
||||||
|
int SetExecutionOrder(int execution_order) {
|
||||||
|
this->execution_order = execution_order;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SetName(const std::string &name) {
|
||||||
|
this->name = name;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SetTensor(mindspore::tensor::TensorPtr out_tensor) {
|
||||||
|
this->tensor_ptr = out_tensor;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SetSlot(size_t slot) {
|
||||||
|
this->slot = slot;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_DEBUG_TENSOR_DATA_H_
|
|
@ -0,0 +1,69 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <tuple>
|
||||||
|
#include <string>
|
||||||
|
#include "debug/tensor_data.h"
|
||||||
|
namespace mindspore {
|
||||||
|
class TensorLoader {
|
||||||
|
public:
|
||||||
|
TensorLoader() : iter_num(-1) {}
|
||||||
|
|
||||||
|
~TensorLoader() {}
|
||||||
|
|
||||||
|
bool LoadNewTensor(std::shared_ptr<TensorData> tensor) {
|
||||||
|
tensor_list.push_back(tensor);
|
||||||
|
tensor_list_map.insert({tensor->GetName(), tensor});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
std::vector<std::shared_ptr<TensorData>> GetTensor() { return tensor_list; }
|
||||||
|
|
||||||
|
uint32_t GetIterNum() { return iter_num; }
|
||||||
|
|
||||||
|
std::map<std::string, std::shared_ptr<TensorData>> GetTensorMap() { return tensor_list_map; }
|
||||||
|
void SearchTensors(const std::vector<std::string> &search_list,
|
||||||
|
std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) {
|
||||||
|
for (auto i : search_list) {
|
||||||
|
std::map<std::string, std::shared_ptr<TensorData>>::iterator iter;
|
||||||
|
iter = tensor_list_map.find(i);
|
||||||
|
if (iter != tensor_list_map.end()) {
|
||||||
|
result_list->push_back(std::make_tuple(i, iter->second));
|
||||||
|
} else {
|
||||||
|
result_list->push_back(std::make_tuple(i, nullptr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool EmptyTensor() {
|
||||||
|
tensor_list_map.clear();
|
||||||
|
tensor_list.clear();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_iter_num(uint32_t iter_num) { this->iter_num = iter_num; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::shared_ptr<TensorData>> tensor_list;
|
||||||
|
std::map<std::string, std::shared_ptr<TensorData>> tensor_list_map;
|
||||||
|
uint32_t iter_num;
|
||||||
|
};
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_DEBUG_TENSOR_LOAD_H_
|
|
@ -30,6 +30,10 @@
|
||||||
#ifdef ENABLE_DUMP_E2E
|
#ifdef ENABLE_DUMP_E2E
|
||||||
#include "debug/e2e_dump.h"
|
#include "debug/e2e_dump.h"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
#include "debug/tensor_load.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
|
@ -346,6 +350,52 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order,
|
||||||
|
const std::string &host_fmt, const std::vector<int> &host_shape,
|
||||||
|
TypeId host_type, size_t slot, Debugger *debugger) const {
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
DebugServices *debug_services = debugger->get_debug_services();
|
||||||
|
TensorLoader *tensor_loader = debug_services->get_tensor_loader();
|
||||||
|
|
||||||
|
if (trans_flag) {
|
||||||
|
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||||
|
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
|
||||||
|
size_t host_size = out_tensor->data().nbytes();
|
||||||
|
ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c(true));
|
||||||
|
if (!ret) {
|
||||||
|
MS_LOG(ERROR) << "Copy device mem to host failed";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
auto tensor_data = std::make_shared<mindspore::TensorData>();
|
||||||
|
tensor_data->SetName(tensor_name);
|
||||||
|
tensor_data->SetExecutionOrder(execution_order);
|
||||||
|
tensor_data->SetTensor(out_tensor);
|
||||||
|
tensor_data->SetSlot(slot);
|
||||||
|
ret = tensor_loader->LoadNewTensor(tensor_data);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
|
||||||
|
size_t host_size = out_tensor->data().nbytes();
|
||||||
|
auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(true), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST);
|
||||||
|
|
||||||
|
auto tensor_data = std::make_shared<mindspore::TensorData>();
|
||||||
|
tensor_data->SetName(tensor_name);
|
||||||
|
tensor_data->SetExecutionOrder(execution_order);
|
||||||
|
tensor_data->SetTensor(out_tensor);
|
||||||
|
tensor_data->SetSlot(slot);
|
||||||
|
ret = tensor_loader->LoadNewTensor(tensor_data);
|
||||||
|
if (ret_rt_memcpy != RT_ERROR_NONE) {
|
||||||
|
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
#include "ir/dtype.h"
|
#include "ir/dtype.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
class Debugger;
|
||||||
|
#endif
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
class AscendDeviceAddress : public DeviceAddress {
|
class AscendDeviceAddress : public DeviceAddress {
|
||||||
|
@ -39,6 +42,10 @@ class AscendDeviceAddress : public DeviceAddress {
|
||||||
#ifdef ENABLE_DUMP_E2E
|
#ifdef ENABLE_DUMP_E2E
|
||||||
bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
|
bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
|
||||||
const std::vector<int> &host_shape, TypeId host_type) const;
|
const std::vector<int> &host_shape, TypeId host_type) const;
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||||
|
const std::vector<int> &host_shape, TypeId host_type, size_t slot, Debugger *debugger) const;
|
||||||
#endif
|
#endif
|
||||||
private:
|
private:
|
||||||
bool SyncDeviceToHostAndConvertFormat(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const;
|
bool SyncDeviceToHostAndConvertFormat(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const;
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
#include "kernel/tbe/tbe_python_funcs.h"
|
#include "kernel/tbe/tbe_python_funcs.h"
|
||||||
#include "pre_activate/mem_reuse/mem_reuse_checker.h"
|
#include "pre_activate/mem_reuse/mem_reuse_checker.h"
|
||||||
#include "device/ascend/ascend_memory_manager.h"
|
#include "device/ascend/ascend_memory_manager.h"
|
||||||
|
#include "debug/tensor_load.h"
|
||||||
|
|
||||||
using mindspore::device::ascend::ProfilingManager;
|
using mindspore::device::ascend::ProfilingManager;
|
||||||
using mindspore::device::ascend::ProfilingUtils;
|
using mindspore::device::ascend::ProfilingUtils;
|
||||||
|
@ -293,6 +294,91 @@ bool AscendKernelRuntime::DumpData(mindspore::session::KernelGraph *graph) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
namespace {
|
||||||
|
void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
||||||
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
bool trans_flag = false;
|
||||||
|
const auto &apply_kernels = graph->execution_order();
|
||||||
|
// for kernels, execution order starts from 1
|
||||||
|
int exec_order = 1;
|
||||||
|
for (const auto &node : apply_kernels) {
|
||||||
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||||
|
std::string kernel_name = node->fullname_with_scope();
|
||||||
|
auto output_size = AnfAlgo::GetOutputTensorNum(node);
|
||||||
|
for (size_t j = 0; j < output_size; ++j) {
|
||||||
|
auto addr = AnfAlgo::GetOutputAddr(node, j);
|
||||||
|
auto type = AnfAlgo::GetOutputInferDataType(node, j);
|
||||||
|
auto format = kOpFormat_DEFAULT;
|
||||||
|
string tensor_name = kernel_name + ':' + std::to_string(j);
|
||||||
|
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
|
||||||
|
std::vector<int> int_shapes;
|
||||||
|
if (trans_flag) {
|
||||||
|
int_shapes = trans::GetRuntimePaddingShape(node, j);
|
||||||
|
} else {
|
||||||
|
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
|
||||||
|
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||||
|
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||||
|
}
|
||||||
|
auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger);
|
||||||
|
if (!ret) {
|
||||||
|
MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name
|
||||||
|
<< ", host_format:" << format << ".!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exec_order = exec_order + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
||||||
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
bool trans_flag = false;
|
||||||
|
const auto ¶meters = graph->inputs();
|
||||||
|
// for parameters, set its execution order to be 0;
|
||||||
|
int exec_order = 0;
|
||||||
|
for (auto &item : parameters) {
|
||||||
|
if (!item->isa<Parameter>()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::string parameter_name = item->fullname_with_scope();
|
||||||
|
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
|
||||||
|
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
|
||||||
|
auto format = kOpFormat_DEFAULT;
|
||||||
|
string tensor_name = parameter_name + ':' + "0";
|
||||||
|
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
|
||||||
|
std::vector<int> int_shapes;
|
||||||
|
if (trans_flag) {
|
||||||
|
int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX);
|
||||||
|
} else {
|
||||||
|
auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX);
|
||||||
|
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||||
|
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||||
|
}
|
||||||
|
auto ret = ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger);
|
||||||
|
if (!ret) {
|
||||||
|
MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name
|
||||||
|
<< ", host_format:" << format << ".!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
#endif
|
||||||
|
|
||||||
|
bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
||||||
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
MS_LOG(INFO) << "start load step";
|
||||||
|
uint32_t cur_iter = 0;
|
||||||
|
MS_LOG(INFO) << "cur iter is " << cur_iter;
|
||||||
|
// load output
|
||||||
|
LoadOutput(graph, debugger);
|
||||||
|
// load parameters
|
||||||
|
LoadParameters(graph, debugger);
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool AscendKernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
|
bool AscendKernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
|
||||||
if (AnfAlgo::OutputAddrExist(kernel, index)) {
|
if (AnfAlgo::OutputAddrExist(kernel, index)) {
|
||||||
auto address = AnfAlgo::GetOutputAddr(kernel, index);
|
auto address = AnfAlgo::GetOutputAddr(kernel, index);
|
||||||
|
|
|
@ -37,6 +37,7 @@ class AscendKernelRuntime : public KernelRuntime {
|
||||||
~AscendKernelRuntime() override;
|
~AscendKernelRuntime() override;
|
||||||
bool Init() override;
|
bool Init() override;
|
||||||
bool DumpData(session::KernelGraph *graph) override;
|
bool DumpData(session::KernelGraph *graph) override;
|
||||||
|
bool LoadData(session::KernelGraph *graph, Debugger *debugger) override;
|
||||||
bool GenTask(const session::KernelGraph *graph) override;
|
bool GenTask(const session::KernelGraph *graph) override;
|
||||||
bool RunTask(const session::KernelGraph *graph) override;
|
bool RunTask(const session::KernelGraph *graph) override;
|
||||||
bool LoadTask(const session::KernelGraph *graph) override;
|
bool LoadTask(const session::KernelGraph *graph) override;
|
||||||
|
|
|
@ -79,6 +79,14 @@ bool KernelRuntime::DumpData(mindspore::session::KernelGraph *graph) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for D to impl
|
||||||
|
bool KernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
||||||
|
if (graph != nullptr) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// for D to impl
|
// for D to impl
|
||||||
bool KernelRuntime::GenTask(const session::KernelGraph *graph) {
|
bool KernelRuntime::GenTask(const session::KernelGraph *graph) {
|
||||||
if (graph != nullptr) {
|
if (graph != nullptr) {
|
||||||
|
|
|
@ -27,6 +27,9 @@
|
||||||
#ifdef ENABLE_DUMP_E2E
|
#ifdef ENABLE_DUMP_E2E
|
||||||
#include "debug/e2e_dump.h"
|
#include "debug/e2e_dump.h"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#endif
|
||||||
#include "session/kernel_graph.h"
|
#include "session/kernel_graph.h"
|
||||||
#include "session/anf_runtime_algorithm.h"
|
#include "session/anf_runtime_algorithm.h"
|
||||||
#include "kernel/kernel.h"
|
#include "kernel/kernel.h"
|
||||||
|
@ -34,11 +37,15 @@
|
||||||
#include "device/memory_manager.h"
|
#include "device/memory_manager.h"
|
||||||
|
|
||||||
using mindspore::tensor::Tensor;
|
using mindspore::tensor::Tensor;
|
||||||
|
using std::vector;
|
||||||
using TensorPtr = std::shared_ptr<Tensor>;
|
using TensorPtr = std::shared_ptr<Tensor>;
|
||||||
using mindspore::kernel::AddressPtr;
|
using mindspore::kernel::AddressPtr;
|
||||||
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
|
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
#ifndef ENABLE_DEBUGGER
|
||||||
|
class Debugger;
|
||||||
|
#endif
|
||||||
namespace device {
|
namespace device {
|
||||||
class KernelRuntime {
|
class KernelRuntime {
|
||||||
public:
|
public:
|
||||||
|
@ -50,6 +57,7 @@ class KernelRuntime {
|
||||||
void RunOpClearMemory(session::KernelGraph *graph);
|
void RunOpClearMemory(session::KernelGraph *graph);
|
||||||
virtual bool Run(session::KernelGraph *graph);
|
virtual bool Run(session::KernelGraph *graph);
|
||||||
virtual bool DumpData(session::KernelGraph *graph);
|
virtual bool DumpData(session::KernelGraph *graph);
|
||||||
|
virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger);
|
||||||
virtual bool RunTask(const session::KernelGraph *graph);
|
virtual bool RunTask(const session::KernelGraph *graph);
|
||||||
virtual bool GenTask(const session::KernelGraph *graph);
|
virtual bool GenTask(const session::KernelGraph *graph);
|
||||||
bool LaunchKernel(const session::KernelGraph *graph);
|
bool LaunchKernel(const session::KernelGraph *graph);
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "kernel/cpu/debug_cpu_kernel.h"
|
||||||
|
#include "device/cpu/cpu_device_address.h"
|
||||||
|
#include "common/utils.h"
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
void DebugCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); }
|
||||||
|
|
||||||
|
bool DebugCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
|
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||||
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
|
if (inputs.size() < 1 || outputs.empty()) {
|
||||||
|
MS_LOG(EXCEPTION) << " input or output empty!";
|
||||||
|
}
|
||||||
|
auto val = reinterpret_cast<float *>(inputs[0]->addr);
|
||||||
|
MS_LOG(DEBUG) << " launch DebugCountCPUKernel val " << *val;
|
||||||
|
|
||||||
|
auto output = reinterpret_cast<int *>(outputs[0]->addr);
|
||||||
|
size_t elem_num = inputs[0]->size / sizeof(int);
|
||||||
|
for (size_t i = 0; i < elem_num; i++) {
|
||||||
|
output[i] = val[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// debugger will suspend execution is neccessary
|
||||||
|
Debugger::GetInstance()->PostDebugOp();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_
|
||||||
|
#define MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include "kernel/cpu/cpu_kernel.h"
|
||||||
|
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
class DebugCPUKernel : public CPUKernel {
|
||||||
|
public:
|
||||||
|
DebugCPUKernel() = default;
|
||||||
|
~DebugCPUKernel() override = default;
|
||||||
|
|
||||||
|
void InitKernel(const CNodePtr &kernel_node) override;
|
||||||
|
|
||||||
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
MS_REG_CPU_KERNEL(Debug, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), DebugCPUKernel);
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_KERNEL_CPU_DEBUG_CPU_KERNEL_H_
|
|
@ -275,5 +275,6 @@ const PrimitivePtr kPrimScalarSummary = std::make_shared<Primitive>("ScalarSumma
|
||||||
const PrimitivePtr kPrimImageSummary = std::make_shared<Primitive>("ImageSummary");
|
const PrimitivePtr kPrimImageSummary = std::make_shared<Primitive>("ImageSummary");
|
||||||
const PrimitivePtr kPrimTensorSummary = std::make_shared<Primitive>("TensorSummary");
|
const PrimitivePtr kPrimTensorSummary = std::make_shared<Primitive>("TensorSummary");
|
||||||
const PrimitivePtr kPrimHistogramSummary = std::make_shared<Primitive>("HistogramSummary");
|
const PrimitivePtr kPrimHistogramSummary = std::make_shared<Primitive>("HistogramSummary");
|
||||||
|
const PrimitivePtr kPrimDebug = std::make_shared<Primitive>("Debug");
|
||||||
} // namespace prim
|
} // namespace prim
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -276,6 +276,7 @@ extern const PrimitivePtr kPrimNotInDict;
|
||||||
extern const PrimitivePtr kPrimMixedPrecisionCast;
|
extern const PrimitivePtr kPrimMixedPrecisionCast;
|
||||||
extern const PrimitivePtr kPrimIsConsant;
|
extern const PrimitivePtr kPrimIsConsant;
|
||||||
extern const PrimitivePtr kPrimEquivFormat;
|
extern const PrimitivePtr kPrimEquivFormat;
|
||||||
|
extern const PrimitivePtr kPrimDebug;
|
||||||
|
|
||||||
// Comm ops
|
// Comm ops
|
||||||
extern const PrimitivePtr kPrimAllReduce;
|
extern const PrimitivePtr kPrimAllReduce;
|
||||||
|
|
|
@ -21,5 +21,21 @@
|
||||||
#include "utils/symbolic.h"
|
#include "utils/symbolic.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace abstract {} // namespace abstract
|
namespace abstract {
|
||||||
|
AbstractBasePtr InferImplDebug(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||||
|
const AbstractBasePtrList &args_spec_list) {
|
||||||
|
// Inputs: a tensor(value)
|
||||||
|
const std::string op_name = primitive->name();
|
||||||
|
|
||||||
|
CheckArgsSize(op_name, args_spec_list, 1);
|
||||||
|
auto tensor_value = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
|
||||||
|
|
||||||
|
int tensor_rank = SizeToInt(tensor_value->shape()->shape().size());
|
||||||
|
if (tensor_rank == 0) {
|
||||||
|
MS_LOG(EXCEPTION) << op_name << " summary evaluator second arg should be an tensor, but got a scalar, rank is 0";
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_shared<AbstractTuple>(AbstractBasePtrList({tensor_value->Broaden()}));
|
||||||
|
}
|
||||||
|
} // namespace abstract
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -66,6 +66,7 @@ const std::set<std::string> BLACK_LIST = {TUPLE_GETITEM,
|
||||||
SCALARSUMMARY,
|
SCALARSUMMARY,
|
||||||
IMAGESUMMARY,
|
IMAGESUMMARY,
|
||||||
TENSORSUMMARY,
|
TENSORSUMMARY,
|
||||||
|
DEBUG,
|
||||||
HISTOGRAMSUMMARY,
|
HISTOGRAMSUMMARY,
|
||||||
COL2IMV1,
|
COL2IMV1,
|
||||||
RESOLVE,
|
RESOLVE,
|
||||||
|
|
|
@ -269,6 +269,7 @@ constexpr char SCALARSUMMARY[] = "ScalarSummary";
|
||||||
constexpr char IMAGESUMMARY[] = "ImageSummary";
|
constexpr char IMAGESUMMARY[] = "ImageSummary";
|
||||||
constexpr char TENSORSUMMARY[] = "TensorSummary";
|
constexpr char TENSORSUMMARY[] = "TensorSummary";
|
||||||
constexpr char HISTOGRAMSUMMARY[] = "HistogramSummary";
|
constexpr char HISTOGRAMSUMMARY[] = "HistogramSummary";
|
||||||
|
constexpr char DEBUG[] = "Debug";
|
||||||
constexpr char BROADCASTGRADIENTARGS[] = "BroadcastGradientArgs";
|
constexpr char BROADCASTGRADIENTARGS[] = "BroadcastGradientArgs";
|
||||||
constexpr char INVERTPERMUTATION[] = "InvertPermutation";
|
constexpr char INVERTPERMUTATION[] = "InvertPermutation";
|
||||||
constexpr char CONTROLDEPEND[] = "ControlDepend";
|
constexpr char CONTROLDEPEND[] = "ControlDepend";
|
||||||
|
|
|
@ -445,7 +445,10 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
|
||||||
std::string backend = MsContext::GetInstance()->backend_policy();
|
std::string backend = MsContext::GetInstance()->backend_policy();
|
||||||
if (use_vm && backend != "ge") {
|
if (use_vm && backend != "ge") {
|
||||||
// Create backend and session
|
// Create backend and session
|
||||||
resource->results()[kBackend] = compile::CreateBackend();
|
auto backend_ptr = compile::CreateBackend();
|
||||||
|
// Connect session to debugger
|
||||||
|
backend_ptr->SetDebugger();
|
||||||
|
resource->results()[kBackend] = backend_ptr;
|
||||||
p_actions = VmPipeline();
|
p_actions = VmPipeline();
|
||||||
} else {
|
} else {
|
||||||
p_actions = GePipeline();
|
p_actions = GePipeline();
|
||||||
|
|
|
@ -130,6 +130,8 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
|
||||||
{prim::kPrimDepend, {InferImplDepend, true}},
|
{prim::kPrimDepend, {InferImplDepend, true}},
|
||||||
{prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}},
|
{prim::kPrimBroadcastGradientArgs, {InferImplBroadcastGradientArgs, false}},
|
||||||
{prim::kPrimControlDepend, {InferImplControlDepend, true}},
|
{prim::kPrimControlDepend, {InferImplControlDepend, true}},
|
||||||
|
// Debug
|
||||||
|
{prim::kPrimDebug, {InferImplDebug, true}},
|
||||||
};
|
};
|
||||||
return prim_eval_implement_map;
|
return prim_eval_implement_map;
|
||||||
}
|
}
|
||||||
|
|
|
@ -346,6 +346,9 @@ AbstractBasePtr InferImplBroadcastGradientArgs(const AnalysisEnginePtr &, const
|
||||||
const AbstractBasePtrList &args_spec_list);
|
const AbstractBasePtrList &args_spec_list);
|
||||||
AbstractBasePtr InferImplControlDepend(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
AbstractBasePtr InferImplControlDepend(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||||
const AbstractBasePtrList &args_spec_list);
|
const AbstractBasePtrList &args_spec_list);
|
||||||
|
|
||||||
|
AbstractBasePtr InferImplDebug(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||||
|
const AbstractBasePtrList &args_spec_list);
|
||||||
} // namespace abstract
|
} // namespace abstract
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include "operator/ops.h"
|
#include "operator/ops.h"
|
||||||
#include "ir/tensor.h"
|
#include "ir/tensor.h"
|
||||||
|
@ -45,6 +46,7 @@
|
||||||
#include "kernel/tbe/tbe_python_funcs.h"
|
#include "kernel/tbe/tbe_python_funcs.h"
|
||||||
#include "utils/config_manager.h"
|
#include "utils/config_manager.h"
|
||||||
#include "utils/base_ref_extends.h"
|
#include "utils/base_ref_extends.h"
|
||||||
|
#include "debug/tensor_load.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace session {
|
namespace session {
|
||||||
|
@ -450,6 +452,12 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
|
||||||
LoadInputData(kernel_graph, inputs);
|
LoadInputData(kernel_graph, inputs);
|
||||||
// convert inputs to model
|
// convert inputs to model
|
||||||
predictmodel::StepConvertWeight(inputs);
|
predictmodel::StepConvertWeight(inputs);
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// debugger pre-execution processing
|
||||||
|
if (debugger_) {
|
||||||
|
debugger_->PreExecute(kernel_graph);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
py::gil_scoped_release release;
|
py::gil_scoped_release release;
|
||||||
// run task on device
|
// run task on device
|
||||||
|
@ -459,8 +467,20 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
|
||||||
UpdateOutputs(kernel_graph, outputs, inputs);
|
UpdateOutputs(kernel_graph, outputs, inputs);
|
||||||
// summary
|
// summary
|
||||||
Summary(kernel_graph.get());
|
Summary(kernel_graph.get());
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// load tensor from device for debugger
|
||||||
|
if (debugger_ && debugger_->debugger_enabled()) {
|
||||||
|
LoadTensor(kernel_graph);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// dump used for debug
|
// dump used for debug
|
||||||
Dump(kernel_graph);
|
Dump(kernel_graph);
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// debugger post-execution processing
|
||||||
|
if (debugger_) {
|
||||||
|
debugger_->PostExecute();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
MS_LOG(INFO) << "Finish!";
|
MS_LOG(INFO) << "Finish!";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -757,6 +777,22 @@ void AscendSession::ExportChildGraphs(const GraphId graph_id) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||||
|
MS_LOG(INFO) << "Start!";
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||||
|
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||||
|
DebugServices *debug_services = debugger_->get_debug_services();
|
||||||
|
TensorLoader *tensor_loader = debug_services->get_tensor_loader();
|
||||||
|
tensor_loader->EmptyTensor();
|
||||||
|
uint32_t iter_num = tensor_loader->GetIterNum();
|
||||||
|
tensor_loader->set_iter_num(++iter_num);
|
||||||
|
(void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get());
|
||||||
|
#endif
|
||||||
|
MS_LOG(INFO) << "Finish!";
|
||||||
|
}
|
||||||
|
|
||||||
GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) {
|
GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) {
|
||||||
MS_LOG(INFO) << "Start! Args size " << args.size();
|
MS_LOG(INFO) << "Start! Args size " << args.size();
|
||||||
auto final_graph = NewKernelGraph();
|
auto final_graph = NewKernelGraph();
|
||||||
|
|
|
@ -87,6 +87,7 @@ class AscendSession : public SessionBasic {
|
||||||
void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||||
void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||||
void ExportChildGraphs(const GraphId graph_id);
|
void ExportChildGraphs(const GraphId graph_id);
|
||||||
|
void LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||||
// below functions are used for run op
|
// below functions are used for run op
|
||||||
void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const;
|
void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const;
|
||||||
void RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
void RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
#include "predict/predict.h"
|
#include "predict/predict.h"
|
||||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||||
#include "device/cpu/kernel_select_cpu.h"
|
#include "device/cpu/kernel_select_cpu.h"
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace session {
|
namespace session {
|
||||||
|
@ -78,7 +81,12 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
|
||||||
summary_outputs = kernel_graph->summary_nodes();
|
summary_outputs = kernel_graph->summary_nodes();
|
||||||
runtime_.IncreaseSummaryRefCount(summary_outputs);
|
runtime_.IncreaseSummaryRefCount(summary_outputs);
|
||||||
}
|
}
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// debugger pre-execution processing
|
||||||
|
if (debugger_) {
|
||||||
|
debugger_->PreExecute(kernel_graph);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
bool ret = runtime_.Run(kernel_graph.get());
|
bool ret = runtime_.Run(kernel_graph.get());
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
MS_LOG(EXCEPTION) << "Run graph failed";
|
MS_LOG(EXCEPTION) << "Run graph failed";
|
||||||
|
@ -92,6 +100,12 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
|
||||||
runtime_.DecreaseSummaryRefCount(summary_outputs);
|
runtime_.DecreaseSummaryRefCount(summary_outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// debugger post-execution processing
|
||||||
|
if (debugger_) {
|
||||||
|
debugger_->PostExecute();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
MS_LOG(INFO) << "Run graph end";
|
MS_LOG(INFO) << "Run graph end";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,9 @@
|
||||||
#include "utils/contract.h"
|
#include "utils/contract.h"
|
||||||
#include "pynative/pynative_execute.h"
|
#include "pynative/pynative_execute.h"
|
||||||
#include "device/kernel_info.h"
|
#include "device/kernel_info.h"
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
#include "debug/debugger/debugger.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
using GraphId = uint32_t;
|
using GraphId = uint32_t;
|
||||||
|
@ -48,7 +51,11 @@ using OpRunInfoPtr = std::shared_ptr<OpRunInfo>;
|
||||||
|
|
||||||
class SessionBasic {
|
class SessionBasic {
|
||||||
public:
|
public:
|
||||||
SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {}
|
SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
debugger_ = nullptr;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
virtual void Init(uint32_t device_id) { device_id_ = device_id; }
|
virtual void Init(uint32_t device_id) { device_id_ = device_id; }
|
||||||
|
|
||||||
|
@ -92,6 +99,14 @@ class SessionBasic {
|
||||||
virtual void SetActive(GraphId, GraphId) {}
|
virtual void SetActive(GraphId, GraphId) {}
|
||||||
virtual void GetSummaryNodes(KernelGraph *graph);
|
virtual void GetSummaryNodes(KernelGraph *graph);
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
// set debugger
|
||||||
|
void SetDebugger() {
|
||||||
|
debugger_ = Debugger::GetInstance();
|
||||||
|
debugger_->Init(device_id_);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
const std::vector<tensor::TensorPtr> &inputs_const) const;
|
const std::vector<tensor::TensorPtr> &inputs_const) const;
|
||||||
|
@ -123,6 +138,9 @@ class SessionBasic {
|
||||||
CallBackFunc summary_callback_;
|
CallBackFunc summary_callback_;
|
||||||
static GraphId graph_sum_;
|
static GraphId graph_sum_;
|
||||||
uint32_t device_id_;
|
uint32_t device_id_;
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
std::shared_ptr<Debugger> debugger_;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
using SessionPtr = std::shared_ptr<session::SessionBasic>;
|
using SessionPtr = std::shared_ptr<session::SessionBasic>;
|
||||||
|
|
|
@ -371,6 +371,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
|
||||||
{prim::kPrimImageSummary->name(), ADPT_DESC(Summary)},
|
{prim::kPrimImageSummary->name(), ADPT_DESC(Summary)},
|
||||||
{prim::kPrimTensorSummary->name(), ADPT_DESC(Summary)},
|
{prim::kPrimTensorSummary->name(), ADPT_DESC(Summary)},
|
||||||
{prim::kPrimHistogramSummary->name(), ADPT_DESC(Summary)},
|
{prim::kPrimHistogramSummary->name(), ADPT_DESC(Summary)},
|
||||||
|
{prim::kPrimDebug->name(), ADPT_DESC(Summary)},
|
||||||
{prim::kPrimTensorAdd->name(),
|
{prim::kPrimTensorAdd->name(),
|
||||||
std::make_shared<OpAdapterDesc>(std::make_shared<OpAdapter<Add>>(ExtraAttr({{"mode", MakeValue(1)}})),
|
std::make_shared<OpAdapterDesc>(std::make_shared<OpAdapter<Add>>(ExtraAttr({{"mode", MakeValue(1)}})),
|
||||||
std::make_shared<OpAdapter<Add>>(ExtraAttr({{"mode", MakeValue(1)}})))},
|
std::make_shared<OpAdapter<Add>>(ExtraAttr({{"mode", MakeValue(1)}})))},
|
||||||
|
|
|
@ -69,7 +69,11 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
|
||||||
enable_task_sink_ = true;
|
enable_task_sink_ = true;
|
||||||
ir_fusion_flag_ = true;
|
ir_fusion_flag_ = true;
|
||||||
enable_hccl_ = false;
|
enable_hccl_ = false;
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
enable_mem_reuse_ = false;
|
||||||
|
#else
|
||||||
enable_mem_reuse_ = true;
|
enable_mem_reuse_ = true;
|
||||||
|
#endif
|
||||||
enable_gpu_summary_ = true;
|
enable_gpu_summary_ = true;
|
||||||
precompile_only_ = false;
|
precompile_only_ = false;
|
||||||
auto_mixed_precision_flag_ = false;
|
auto_mixed_precision_flag_ = false;
|
||||||
|
|
|
@ -362,5 +362,9 @@ GraphId MsBackend::CompileGraph(NotNull<FuncGraphPtr> fg) { return target_sess_-
|
||||||
|
|
||||||
VectorRef MsBackend::RunGraph(GraphId graph_id, const VectorRef &args) { return MsRunGraph(graph_id, args); }
|
VectorRef MsBackend::RunGraph(GraphId graph_id, const VectorRef &args) { return MsRunGraph(graph_id, args); }
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
void MsBackend::SetDebugger() { target_sess_->SetDebugger(); }
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace compile
|
} // namespace compile
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -69,6 +69,8 @@ class Backend {
|
||||||
bool is_switch_call() const { return is_switch_call_; }
|
bool is_switch_call() const { return is_switch_call_; }
|
||||||
void set_simu_flag(bool simu) { simu_flag_ = simu; }
|
void set_simu_flag(bool simu) { simu_flag_ = simu; }
|
||||||
|
|
||||||
|
virtual void SetDebugger() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string name_;
|
std::string name_;
|
||||||
LinkFuncType convert_fn_;
|
LinkFuncType convert_fn_;
|
||||||
|
@ -109,6 +111,10 @@ class MsBackend : public Backend {
|
||||||
VectorRef RunGraph(GraphId graph_id, const VectorRef &args);
|
VectorRef RunGraph(GraphId graph_id, const VectorRef &args);
|
||||||
void CreateOtherSession(const std::string &target);
|
void CreateOtherSession(const std::string &target);
|
||||||
|
|
||||||
|
#ifdef ENABLE_DEBUGGER
|
||||||
|
void SetDebugger() override;
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
session::SessionPtr target_sess_;
|
session::SessionPtr target_sess_;
|
||||||
session::SessionPtr other_sess_;
|
session::SessionPtr other_sess_;
|
||||||
|
|
|
@ -66,3 +66,12 @@ def get_bprop_insert_gradient_of(self):
|
||||||
def bprop(x, out, dout):
|
def bprop(x, out, dout):
|
||||||
return (f(dout),)
|
return (f(dout),)
|
||||||
return bprop
|
return bprop
|
||||||
|
|
||||||
|
|
||||||
|
@bprop_getters.register(P.Debug)
|
||||||
|
def get_bprop_debug(self):
|
||||||
|
"""Generate bprop for Debug"""
|
||||||
|
|
||||||
|
def bprop(x, out, dout):
|
||||||
|
return dout
|
||||||
|
return bprop
|
||||||
|
|
|
@ -37,7 +37,7 @@ from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast
|
||||||
_VirtualDiv, _GetTensorSlice,
|
_VirtualDiv, _GetTensorSlice,
|
||||||
HostAllGather, HostReduceScatter)
|
HostAllGather, HostReduceScatter)
|
||||||
from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSummary,
|
from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSummary,
|
||||||
TensorSummary, HistogramSummary, Print)
|
TensorSummary, HistogramSummary, Debug, Print)
|
||||||
from .control_ops import ControlDepend, GeSwitch, Merge
|
from .control_ops import ControlDepend, GeSwitch, Merge
|
||||||
from .inner_ops import ScalarCast
|
from .inner_ops import ScalarCast
|
||||||
|
|
||||||
|
@ -173,6 +173,7 @@ __all__ = [
|
||||||
'ImageSummary',
|
'ImageSummary',
|
||||||
'TensorSummary',
|
'TensorSummary',
|
||||||
'HistogramSummary',
|
'HistogramSummary',
|
||||||
|
"Debug",
|
||||||
"Print",
|
"Print",
|
||||||
'InsertGradientOf',
|
'InsertGradientOf',
|
||||||
'HookBackward',
|
'HookBackward',
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
from types import FunctionType, MethodType
|
from types import FunctionType, MethodType
|
||||||
from ..._checkparam import Validator as validator
|
from ..._checkparam import Validator as validator
|
||||||
from ...common import dtype as mstype
|
from ...common import dtype as mstype
|
||||||
from ..primitive import prim_attr_register, PrimitiveWithInfer
|
from ..primitive import prim_attr_register, PrimitiveWithInfer, Primitive
|
||||||
|
|
||||||
|
|
||||||
def _check_summary_param(name, value, class_name):
|
def _check_summary_param(name, value, class_name):
|
||||||
|
@ -340,3 +340,29 @@ class Print(PrimitiveWithInfer):
|
||||||
for dtype in inputs:
|
for dtype in inputs:
|
||||||
validator.check_subclass("input", dtype, (mstype.tensor, mstype.string), self.name)
|
validator.check_subclass("input", dtype, (mstype.tensor, mstype.string), self.name)
|
||||||
return mstype.int32
|
return mstype.int32
|
||||||
|
|
||||||
|
|
||||||
|
class Debug(Primitive):
|
||||||
|
"""
|
||||||
|
Print tensor value.
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
- **value** (Tensor) - The value of tensor.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> class DebugNN(nn.Cell):
|
||||||
|
>>> def __init__(self,):
|
||||||
|
>>> self.debug = nn.Debug()
|
||||||
|
>>>
|
||||||
|
>>> def construct(self, x, y):
|
||||||
|
>>> x = self.add(x, y)
|
||||||
|
>>> self.debug(x)
|
||||||
|
>>> return x
|
||||||
|
"""
|
||||||
|
|
||||||
|
@prim_attr_register
|
||||||
|
def __init__(self):
|
||||||
|
"""init"""
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
|
@ -114,6 +114,12 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strateg
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_converter.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_converter.cc")
|
||||||
|
|
||||||
|
# remove files for debugger
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/debugger/debugger.cc")
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/debugger/grpc_client.cc")
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/debug_services.cc")
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/debugger/proto_exporter.cc")
|
||||||
|
|
||||||
file(GLOB_RECURSE UT_SUTB_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
file(GLOB_RECURSE UT_SUTB_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"stub/aicpu/*.cc"
|
"stub/aicpu/*.cc"
|
||||||
"stub/cce/*.cc"
|
"stub/cce/*.cc"
|
||||||
|
|
Loading…
Reference in New Issue