add security isolation to online and offline debugger

This commit is contained in:
sabrinasun 2021-09-12 23:01:15 -04:00
parent 25f8212a91
commit 220245f592
30 changed files with 177 additions and 31 deletions

View File

@ -55,11 +55,13 @@ install(
COMPONENT mindspore
)
install(
TARGETS _mindspore_offline_debug
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)
if(ENABLE_DEBUGGER)
install(
TARGETS _mindspore_offline_debug
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)
endif()
install(
TARGETS mindspore_shared_lib

View File

@ -56,15 +56,15 @@
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/config_manager.h"
#include "debug/data_dump/dump_json_parser.h"
#include "debug/tensor_load.h"
#include "debug/data_dump/e2e_dump.h"
#include "debug/anf_ir_utils.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "backend/session/ascend_auto_monad.h"
#include "debug/data_dump/e2e_dump.h"
#include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h"
#include "abstract/utils.h"
#ifdef ENABLE_DEBUGGER
#include "debug/tensor_load.h"
#include "debug/debugger/proto_exporter.h"
#else
#include "debug/debugger/proto_exporter_stub.h"
@ -520,10 +520,12 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
HardwareOptimize(NOT_NULL(root_graph), NOT_NULL(&memo));
memo.clear();
#ifdef ENABLE_DEBUGGER
// load graphs to debugger.
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
LoadGraphsToDbg(NOT_NULL(root_graph), NOT_NULL(&memo));
}
#endif
memo.clear();
UpdateRefOutputMap(NOT_NULL(root_graph), NOT_NULL(&memo));
memo.clear();
@ -553,9 +555,11 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
device::KernelAdjust::GetInstance().InsertOverflowCheckOperations(NOT_NULL(root_graph));
// build kernel
BuildKernel(root_graph);
#ifdef ENABLE_DEBUGGER
if (debugger_ && debugger_->partial_memory()) {
debugger_->PreExecute(root_graph);
}
#endif
SetSummaryNodes(root_graph.get());
// Alloc memory for child graph's inputs
AssignStaticMemory(NOT_NULL(root_graph), NOT_NULL(&memo));
@ -568,6 +572,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
root_graph->SetOptimizerFlag();
DumpAllGraphs(all_graphs);
// Save memory profiling data to proto file
#ifndef ENABLE_SECURITY
auto profiling_instance = MemoryProfiling::GetInstance();
if (profiling_instance.IsMemoryProfilingEnable()) {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
@ -576,6 +581,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
profiling_instance.SetDeviceMemSize(mem_size);
profiling_instance.SaveMemoryProfiling();
}
#endif
// return the root_graph id to backend
auto graph_id = root_graph->graph_id();
return graph_id;
@ -628,9 +634,11 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
BuildKernel(graph);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
#ifdef ENABLE_DEBUGGER
if (debugger_ && debugger_->partial_memory()) {
debugger_->PreExecute(graph);
}
#endif
if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
MS_LOG(INFO) << "Precompile only, stop in build kernel step";
} else {
@ -677,9 +685,11 @@ bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedInser
void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
#ifdef ENABLE_DEBUGGER
if (debugger_) {
debugger_->PreExecute(kernel_graph);
}
#endif
#if ENABLE_CPU && ENABLE_D
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
@ -694,6 +704,7 @@ void AscendSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_
const std::vector<tensor::TensorPtr> &, VectorRef *const) {
// summary
Summary(kernel_graph.get());
#ifdef ENABLE_DEBUGGER
// load tensor from device for debugger
if (debugger_ && debugger_->debugger_enabled()) {
LoadTensor(kernel_graph);
@ -702,6 +713,7 @@ void AscendSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_
if (debugger_) {
debugger_->PostExecute();
}
#endif
}
void AscendSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) { Execute(kernel_graph, true); }
@ -1759,6 +1771,7 @@ void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
MS_LOG(INFO) << "Finish doing HardwareOptimize in graph: " << graph->graph_id();
}
#ifdef ENABLE_DEBUGGER
void AscendSession::LoadGraphsToDbg(NotNull<KernelGraphPtr> graph,
NotNull<std::set<KernelGraphPtr> *> const memo) const {
if (memo->find(graph) != memo->end()) {
@ -1775,6 +1788,7 @@ void AscendSession::LoadGraphsToDbg(NotNull<KernelGraphPtr> graph,
}
MS_LOG(INFO) << "Finish doing LoadGraphsToDbg in graph: " << graph->graph_id();
}
#endif
void AscendSession::AssignStaticMemory(NotNull<KernelGraphPtr> graph,
NotNull<std::set<KernelGraphPtr> *> const memo) const {

View File

@ -133,7 +133,9 @@ class AscendSession : public SessionBasic {
size_t *const raise_precision_count, size_t *const reduce_precision_count) const;
void IrFusionPass(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo);
void HardwareOptimize(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
#ifdef ENABLE_DEBUGGER
void LoadGraphsToDbg(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
#endif
void AssignStaticMemory(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
void UpdateRefOutputMap(const NotNull<KernelGraphPtr> graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
void CacheCNodeOutputInfo(const KernelGraph &graph) const;

View File

@ -57,16 +57,16 @@
#include "backend/optimizer/pass/optimize_updatestate.h"
#include "common/trans.h"
#include "debug/anf_ir_dump.h"
#include "debug/data_dump/e2e_dump.h"
#include "debug/dump_proto.h"
#ifdef ENABLE_DEBUGGER
#include "debug/data_dump/e2e_dump.h"
#include "debug/data_dump/dump_json_parser.h"
#include "debug/debugger/proto_exporter.h"
#include "debug/data_dump/dump_utils.h"
#include "debug/tensor_load.h"
#else
#include "debug/debugger/proto_exporter_stub.h"
#endif
#include "debug/data_dump/dump_json_parser.h"
#include "debug/data_dump/dump_utils.h"
#include "debug/tensor_load.h"
#include "debug/dump_proto.h"
#include "runtime/device/gpu/gpu_kernel_build.h"
#include "runtime/device/gpu/gpu_kernel_runtime.h"
#include "runtime/device/gpu/gpu_stream_assign.h"
@ -123,11 +123,12 @@ void GPUSession::Init(uint32_t device_id) {
rank_id_ = GetRankId();
}
}
#ifndef ENABLE_SECURITY
auto &json_parser = DumpJsonParser::GetInstance();
// Dump json config file if dump is enabled
json_parser.CopyJsonToDir(rank_id_);
json_parser.CopyMSCfgJsonToDir(rank_id_);
#endif
MS_LOG(INFO) << "Set device id " << device_id << " for gpu session.";
InitExecutor(kGPUDevice, device_id);
}
@ -403,8 +404,10 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
#ifndef ENABLE_SECURITY
auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
#endif
// Dump .pb graph before graph optimization
if (save_graphs) {
DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id()));
@ -454,6 +457,7 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
if (save_graphs) {
DumpIRProto(graph, "after_opt_" + std::to_string(graph->graph_id()));
}
#ifndef ENABLE_SECURITY
if (json_parser.e2e_dump_enabled()) {
graph->set_root_graph_id(graph->graph_id());
std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id());
@ -465,6 +469,7 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
DumpGraphExeOrder("ms_execution_order_graph_" + std::to_string(graph->graph_id()) + ".csv", root_dir,
graph->execution_order());
}
#endif
// Set graph manager.
MS_EXCEPTION_IF_NULL(context_);
FuncGraphManagerPtr manager = MakeManager({graph});
@ -493,11 +498,13 @@ GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
#ifdef ENABLE_DEBUGGER
if (debugger_) {
debugger_->PreExecute(kernel_graph);
}
DumpSetup(kernel_graph);
#endif
#if ENABLE_CPU && ENABLE_GPU
// Initialize parameter server
@ -513,7 +520,7 @@ void GPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_GPU_SUMMARY)) {
Summary(kernel_graph.get());
}
#ifdef ENABLE_DEBUGGER
if (debugger_ && debugger_->DebuggerBackendEnabled()) {
debugger_->LoadParametersAndConst(kernel_graph);
}
@ -526,6 +533,7 @@ void GPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra
if (debugger_) {
debugger_->PostExecute();
}
#endif
}
void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {
@ -683,6 +691,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
}
}
#ifdef ENABLE_DEBUGGER
void GPUSession::DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "Start!";
MS_EXCEPTION_IF_NULL(kernel_graph);
@ -704,6 +713,7 @@ bool GPUSession::DumpDataEnabledIteration() const {
MS_EXCEPTION_IF_NULL(runtime_instance);
return runtime_instance->DumpDataEnabledIteration();
}
#endif
void GPUSession::SyncStream() const {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);

View File

@ -90,11 +90,13 @@ class GPUSession : public SessionBasic {
void Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const;
#ifdef ENABLE_DEBUGGER
void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const;
void DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph) const;
bool DumpDataEnabledIteration() const;
#endif
GraphId CompileGraphImpl(KernelGraphPtr kernel_graph);
};

View File

@ -33,7 +33,7 @@
#include "runtime/device/kernel_info.h"
#include "utils/ms_context.h"
#include "runtime/device/bucket.h"
#if !defined(_WIN32) && !defined(_WIN64)
#if defined(ENABLE_DEBUGGER) && !defined(_WIN32) && !defined(_WIN64)
#include "debug/debugger/debugger.h"
#endif
#include "runtime/hardware/device_context.h"
@ -93,7 +93,7 @@ class Executor;
class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
public:
SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {
#if !defined(_WIN32) && !defined(_WIN64)
#if defined(ENABLE_DEBUGGER) && !defined(_WIN32) && !defined(_WIN64)
debugger_ = nullptr;
#endif
}
@ -319,7 +319,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
// rank id of physical device
uint32_t rank_id_{0};
std::shared_ptr<Executor> executor_;
#if !defined(_WIN32) && !defined(_WIN64)
#if defined(ENABLE_DEBUGGER) && !defined(_WIN32) && !defined(_WIN64)
std::shared_ptr<Debugger> debugger_;
#endif
};

View File

@ -60,13 +60,15 @@ endif()
set_property(SOURCE ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_DEBUG)
add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST} ${_RDR_SRC_LIST})
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
add_compile_options(-Wall -DOFFLINE_DBG_MODE -fPIC -O2)
set_property(SOURCE ${_OFFLINE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_OFFLINE_DEBUG)
add_library(_mindspore_offline_debug SHARED ${_OFFLINE_SRC_LIST})
set_target_properties(_mindspore_offline_debug PROPERTIES
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
if(ENABLE_DEBUGGER)
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
add_compile_options(-Wall -DOFFLINE_DBG_MODE -fPIC -O2)
set_property(SOURCE ${_OFFLINE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_OFFLINE_DEBUG)
add_library(_mindspore_offline_debug SHARED ${_OFFLINE_SRC_LIST})
set_target_properties(_mindspore_offline_debug PROPERTIES
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
endif()
endif()

View File

@ -103,7 +103,9 @@ const char IR_TYPE_MINDIR[] = "mind_ir";
GraphExecutorPyPtr GraphExecutorPy::executor_ = nullptr;
std::mutex GraphExecutorPy::instance_lock_;
#ifdef ENABLE_DEBUGGER
bool GraphExecutorPy::debugger_terminate_ = false;
#endif
std::unordered_map<abstract::AbstractBasePtrList, uint64_t, abstract::AbstractBasePtrListHasher,
abstract::AbstractBasePtrListEqual>
@ -683,8 +685,10 @@ std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::stri
compile::SetMindRTEnable();
// Create backend.
auto backend_ptr = compile::CreateBackend();
#ifdef ENABLE_DEBUGGER
// Connect session to debugger
backend_ptr->SetDebugger();
#endif
resource->results()[kBackend] = backend_ptr;
// If the 'use_frontend_compile_cache' context has been set true and the cache is read successfully,
// do the backend actions only.
@ -990,6 +994,7 @@ void GraphExecutorPy::ProcessVmArg(const py::tuple &args, const std::string &pha
ProcessVmArgInner(args, GetResource(phase), arg_list);
}
#ifdef ENABLE_DEBUGGER
void GraphExecutorPy::TerminateDebugger() {
if (debugger_terminate_) {
MS_LOG(INFO) << "Terminate debugger and clear resources!";
@ -997,10 +1002,13 @@ void GraphExecutorPy::TerminateDebugger() {
exit(1);
}
}
#endif
py::object GraphExecutorPy::Run(const py::tuple &args, const py::object &phase_obj) {
// Mindspore debugger notify main thread to exit after one step, and will not run next step
#ifdef ENABLE_DEBUGGER
TerminateDebugger();
#endif
std::size_t size = args.size();
if (!py::isinstance<py::str>(phase_obj)) {
MS_LOG(EXCEPTION) << "Run failed, phase input is not a str";

View File

@ -107,9 +107,11 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
void DelNetRes(const std::string &id);
void ReleaseResource(const py::object &phase_obj);
static void ClearRes();
#ifdef ENABLE_DEBUGGER
static bool GetDebugTerminate() { return debugger_terminate_; }
static void DebugTerminate(bool val) { debugger_terminate_ = val; }
void TerminateDebugger();
#endif
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
const std::string &phase);
@ -127,7 +129,9 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
std::map<std::string, ExecutorInfoPtr> info_;
static std::shared_ptr<GraphExecutorPy> executor_;
static std::mutex instance_lock_;
#ifdef ENABLE_DEBUGGER
static bool debugger_terminate_;
#endif
std::map<std::string, py::dict> stra_dict_;
std::string phase_ = "";
std::map<std::string, size_t> phase_to_num_op_info_;

View File

@ -358,8 +358,9 @@ bool AscendKernelRuntime::Init() {
if (!ret) {
return ret;
}
#ifdef ENABLE_DEBUGGER
SetDebugger();
#endif
mem_manager_ = std::make_shared<AscendMemoryManager>();
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory();
@ -583,7 +584,6 @@ void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
}
}
#endif
void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
@ -600,6 +600,7 @@ void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
MS_LOG(EXCEPTION) << "GraphId:" << graph_id << " not found";
}
}
#endif
void AscendKernelRuntime::TaskFailCallback(rtExceptionInfo *task_fail_info) {
MS_EXCEPTION_IF_NULL(task_fail_info);

View File

@ -49,8 +49,10 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
MS_EXCEPTION_IF_NULL(context_ptr);
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
if (save_graphs) {
#ifndef ENABLE_SECURITY
std::string file_path = GetSaveGraphsPathName("task_info_graph_" + std::to_string(graph_id) + ".ir");
DumpTaskInfo(file_path);
#endif
}
return true;
}

View File

@ -127,6 +127,7 @@ void GPUDeviceAddress::ClearDeviceMemory() {
GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); }
#ifdef ENABLE_DEBUGGER
bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
const ShapeVector &host_shape, TypeId host_type, size_t slot,
bool keep_prev) const {
@ -161,6 +162,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
return ret;
}
#endif
} // namespace gpu
} // namespace device
} // namespace mindspore

View File

@ -38,7 +38,9 @@
#include "profiler/device/gpu/gpu_profiling.h"
#include "profiler/device/gpu/gpu_profiling_utils.h"
#include "utils/shape_utils.h"
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
#endif
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debug_services.h"
@ -85,7 +87,9 @@ bool GPUKernelRuntime::Init() {
MS_LOG(ERROR) << "InitDevice error.";
return ret;
}
#ifndef ENABLE_SECURITY
DumpJsonParser::GetInstance().Parse();
#endif
mem_manager_ = std::make_shared<GPUMemoryManager>();
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory();
@ -98,7 +102,11 @@ bool GPUKernelRuntime::Init() {
(*init_nccl_comm_funcptr)();
}
device_init_ = true;
#ifdef ENABLE_DEBUGGER
SetDebugger();
#endif
return ret;
}
@ -120,6 +128,7 @@ std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &out
return real_outputs;
}
#ifdef ENABLE_DEBUGGER
void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
@ -199,6 +208,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
}
debugger->PostExecuteNode(kernel, last_kernel);
}
#endif
} // namespace
bool GPUKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) {
@ -723,10 +733,12 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
AllocCommunicationOpDynamicRes(graph);
AllocInplaceNodeMemory(graph);
#ifdef ENABLE_DEBUGGER
bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration();
if (!mock && debugger_) {
debugger_->UpdateStepNum(graph);
}
#endif
auto &kernels = graph->execution_order();
int exec_order = 1;
#ifdef ENABLE_DUMP_IR
@ -760,11 +772,13 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
AddressPtrList kernel_outputs;
auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock);
if (!ret) {
#ifdef ENABLE_DEBUGGER
if (!mock) {
MS_EXCEPTION_IF_NULL(debugger_);
// invalidate current data collected by the debugger
debugger_->ClearCurrentData();
}
#endif
return false;
}
#ifdef ENABLE_DUMP_IR
@ -778,18 +792,21 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
if (gpu_kernel != nullptr && dynamic_kernel != nullptr && dynamic_kernel->is_dynamic_shape()) {
gpu_kernel->PostExecute();
}
#ifdef ENABLE_DEBUGGER
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
dump_enabled, kernel == last_kernel);
#endif
}
exec_order = exec_order + 1;
FreeKernelDynamicRes(kernel);
if (!UpdateMemorySwapTask(kernel, mock, profiling)) {
#ifdef ENABLE_DEBUGGER
if (!mock) {
// invalidate current data collected by the debugger
debugger_->ClearCurrentData();
}
#endif
return false;
}
}

View File

@ -190,6 +190,7 @@ void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) const {
}
}
#ifdef ENABLE_DEBUGGER
bool KernelRuntime::DumpDataEnabled() {
auto &dump_json_parser = DumpJsonParser::GetInstance();
return dump_json_parser.e2e_dump_enabled();
@ -207,6 +208,7 @@ bool KernelRuntime::DumpDataEnabledIteration() {
}
return false;
}
#endif
void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
AssignStaticMemoryInput(graph);

View File

@ -59,8 +59,10 @@ class KernelRuntime {
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node = {});
void RunOpClearMemory(const session::KernelGraph *graph) const;
void RunOpMallocPre(const session::KernelGraph &graph, const std::vector<tensor::TensorPtr> &input_tensors);
#ifdef ENABLE_DEBUGGER
static bool DumpDataEnabled();
static bool DumpDataEnabledIteration();
#endif
virtual bool LoadData(session::KernelGraph *graph);
virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0;
@ -92,12 +94,14 @@ class KernelRuntime {
void set_device_id(uint32_t device_id) { device_id_ = device_id; }
uint32_t device_id() { return device_id_; }
#ifdef ENABLE_DEBUGGER
// set debugger
void SetDebugger() {
#if !defined(_WIN32) && !defined(_WIN64)
debugger_ = Debugger::GetInstance();
#endif
}
#endif
#ifndef ENABLE_SECURITY
virtual void PreInit() {}
@ -159,7 +163,7 @@ class KernelRuntime {
protected:
uint32_t device_id_{0};
bool pynative_mode_profiling_flag_{false};
#if !defined(_WIN32) && !defined(_WIN64)
#if defined(ENABLE_DEBUGGER) && !defined(_WIN32) && !defined(_WIN64)
std::shared_ptr<Debugger> debugger_;
#endif
void *stream_{nullptr};

View File

@ -21,7 +21,9 @@
#include "runtime/framework/actor/debug_aware_actor.h"
#include "mindrt/include/async/async.h"
#include "utils/log_adapter.h"
#ifndef ENABLE_SECURITY
#include "debug/data_dump/cpu_e2e_dump.h"
#endif
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#include "debug/debugger/debugger_utils.h"
@ -45,11 +47,13 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
const auto &cnode = node->cast<CNodePtr>();
if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kCPU) {
#ifndef ENABLE_SECURITY
if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
MS_EXCEPTION_IF_NULL(kernel_graph);
CPUE2eDump::DumpCNodeData(cnode, kernel_graph->graph_id());
}
#endif
} else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
#ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance();
@ -73,9 +77,11 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const
MS_EXCEPTION_IF_NULL(op_context);
MS_EXCEPTION_IF_NULL(from_aid);
#ifndef ENABLE_SECURITY
if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
CPUE2eDump::DumpParametersAndConst();
}
#endif
#ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance();
@ -86,7 +92,9 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const
debugger->Debugger::PostExecuteGraphDebugger();
}
#else
#ifndef ENABLE_SECURITY
DumpJsonParser::GetInstance().UpdateDumpIter();
#endif
#endif
// Call back to the from actor to process after debug finished.

View File

@ -41,7 +41,9 @@
#include "debug/rdr/running_data_recorder.h"
#endif
#include "utils/comm_manager.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#endif
#include "backend/optimizer/pass/optimize_updatestate.h"
namespace mindspore {
@ -95,13 +97,14 @@ void GPUDeviceContext::Initialize() {
(*init_nccl_comm_funcptr)();
}
#ifndef ENABLE_SECURITY
// Dump json config file if dump is enabled.
auto rank_id = GetRankID();
auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
json_parser.CopyJsonToDir(rank_id);
json_parser.CopyMSCfgJsonToDir(rank_id);
#endif
initialized_ = true;
}
@ -135,11 +138,13 @@ bool GPUDeviceContext::InitDevice() {
void GPUDeviceContext::Destroy() {
// Release GPU buffer manager resource
#ifdef ENABLE_DEBUGGER
auto debugger = Debugger::GetInstance();
if (debugger && debugger->debugger_enabled()) {
debugger->SetTrainingDone(true);
debugger->SendMetadata(false);
}
#endif
if (GpuBufferMgr::GetInstance().IsInit()) {
if (!GpuBufferMgr::GetInstance().IsClosed() && !GpuBufferMgr::GetInstance().CloseNotify()) {

View File

@ -39,7 +39,9 @@
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#endif
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
#endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif

View File

@ -16,6 +16,11 @@ This module provides APIs to load and process dump data, i.e. read tensors, chec
for watchpoints and other debugging services.
"""
from mindspore._c_expression import security
from . import dbg_services
from . import mi_validator_helpers
from . import mi_validators
if security.enable_security():
raise ModuleNotFoundError("Offline debugger is not supported in security mode."\
"Please recompile mindspore without `-s on`.")

View File

@ -16,6 +16,7 @@
The module DbgServices provides offline debugger APIs.
"""
from mindspore._c_expression import security
import mindspore._mindspore_offline_debug as cds
from mindspore.offline_debug.mi_validators import check_init, check_initialize, check_add_watchpoint,\
check_remove_watchpoint, check_check_watchpoints, check_read_tensor_info, check_initialize_done, \
@ -35,6 +36,9 @@ def get_version():
>>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
>>> version = dbg_services.get_version()
"""
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
return cds.DbgServices(False).GetVersion()
class DbgLogger:
@ -75,6 +79,9 @@ class DbgServices():
@check_init
def __init__(self, dump_file_path, verbose=False):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. "\
"Please recompile mindspore without `-s on`.")
log.verbose = verbose
log("in Python __init__, file path is ", dump_file_path)
self.dump_file_path = dump_file_path
@ -382,6 +389,9 @@ class TensorInfo():
@check_tensor_info_init
def __init__(self, node_name, slot, iteration, rank_id, root_graph_id, is_output=True):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
iteration = replace_minus_one(iteration)
self.instance = cds.tensor_info(node_name, slot, iteration, rank_id, root_graph_id, is_output)
@ -531,6 +541,9 @@ class TensorData():
@check_tensor_data_init
def __init__(self, data_ptr, data_size, dtype, shape):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode." \
"Please recompile mindspore without `-s on`.")
self.instance = cds.tensor_data(data_ptr, data_size, dtype, shape)
@property
@ -627,6 +640,9 @@ class TensorBaseData():
"""
@check_tensor_base_data_init
def __init__(self, data_size, dtype, shape):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
self.instance = cds.TensorBaseData(data_size, dtype, shape)
def __str__(self):
@ -727,6 +743,9 @@ class TensorStatData():
@check_tensor_stat_data_init
def __init__(self, data_size, dtype, shape, is_bool, max_value, min_value, avg_value, count,
neg_zero_count, pos_zero_count, nan_count, neg_inf_count, pos_inf_count, zero_count):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
self.instance = cds.TensorStatData(data_size, dtype, shape, is_bool, max_value,
min_value, avg_value, count, neg_zero_count,
pos_zero_count, nan_count, neg_inf_count,
@ -1064,6 +1083,9 @@ class WatchpointHit():
@check_watchpoint_hit_init
def __init__(self, name, slot, condition, watchpoint_id, parameters, error_code, rank_id, root_graph_id):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
parameter_list_inst = []
for elem in parameters:
parameter_list_inst.append(elem.instance)
@ -1285,6 +1307,9 @@ class Parameter():
@check_parameter_init
def __init__(self, name, disabled, value, hit=False, actual_value=0.0):
if security.enable_security():
raise ValueError("Offline debugger is not supported in security mode. " \
"Please recompile mindspore without `-s on`.")
self.instance = cds.parameter(name, disabled, value, hit, actual_value)
@property

View File

@ -20,6 +20,7 @@ import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "async_sink_mode_true_read_tensors"
@ -30,6 +31,7 @@ test_name = "async_sink_mode_true_read_tensors"
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_async_sink_mode_true_read_tensors():
debugger_backend = d.DbgServices(
dump_file_path="/home/workspace/mindspore_dataset/dumps/async_sink_true/")

View File

@ -19,6 +19,7 @@ Watchpoints test script for offline debugger APIs.
import mindspore.offline_debug.dbg_services as d
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "async_sink_mode_true_watchpoints"
@ -29,6 +30,7 @@ test_name = "async_sink_mode_true_watchpoints"
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_async_sink_mode_true_watchpoints():
if GENERATE_GOLDEN:
f_write = open(test_name + ".expected", "w")

View File

@ -20,6 +20,7 @@ import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_false_read_tensors"
@ -30,6 +31,7 @@ test_name = "sync_trans_false_read_tensors"
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_false_read_tensors():
debugger_backend = d.DbgServices(

View File

@ -19,6 +19,7 @@ Watchpoints test script for offline debugger APIs.
import mindspore.offline_debug.dbg_services as d
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_false_watchpoints"
@ -29,6 +30,7 @@ test_name = "sync_trans_false_watchpoints"
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_false_watchpoints():
if GENERATE_GOLDEN:

View File

@ -20,6 +20,7 @@ import mindspore.offline_debug.dbg_services as d
import numpy as np
import pytest
from dump_test_utils import compare_actual_with_expected
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_trans_true_read_tensors"
@ -30,6 +31,7 @@ test_name = "sync_trans_true_read_tensors"
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@pytest.mark.skip(reason="needs updating")
@security_off_wrap
def test_sync_trans_true_read_tensors():
debugger_backend = d.DbgServices(

View File

@ -33,6 +33,7 @@ from mindspore.nn import Momentum
from mindspore.nn import TrainOneStepCell
from mindspore.nn import WithLossCell
from dump_test_utils import generate_dump_json
from tests.security_utils import security_off_wrap
class Net(nn.Cell):
@ -52,6 +53,7 @@ y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
pwd = os.getcwd()
@ -104,6 +106,7 @@ def run_e2e_dump():
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
run_e2e_dump()
@ -113,6 +116,7 @@ def test_e2e_dump():
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_e2e_dump_with_hccl_env():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
@ -123,6 +127,7 @@ def test_e2e_dump_with_hccl_env():
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@security_off_wrap
def test_cpu_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
run_e2e_dump()
@ -131,6 +136,7 @@ def test_cpu_e2e_dump():
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@security_off_wrap
def test_cpu_e2e_dump_with_hccl_set():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
@ -141,6 +147,7 @@ def test_cpu_e2e_dump_with_hccl_set():
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@security_off_wrap
def test_gpu_e2e_dump():
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
run_e2e_dump()
@ -149,6 +156,7 @@ def test_gpu_e2e_dump():
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@security_off_wrap
def test_gpu_e2e_dump_with_hccl_set():
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
os.environ["RANK_TABLE_FILE"] = "invalid_file.json"
@ -175,6 +183,7 @@ class ReluReduceMeanDenseRelu(Cell):
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump_net_multi_layer_mode1():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
pwd = os.getcwd()
@ -218,6 +227,7 @@ def test_async_dump_net_multi_layer_mode1():
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_dump_with_diagnostic_path():
"""
Test e2e dump when path is not set (set to empty) in dump json file and MS_DIAGNOSTIC_DATA_PATH is set.
@ -260,6 +270,7 @@ def run_e2e_dump_execution_graph():
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@security_off_wrap
def test_dump_with_execution_graph():
"""Test dump with execution graph on GPU."""
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')

View File

@ -20,11 +20,13 @@ import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors"
@security_off_wrap
def test_sync_trans_false_read_tensors():
# input tensor with zero slot

View File

@ -19,11 +19,13 @@ import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors_base_stat"
@security_off_wrap
def test_sync_read_tensors_base_stat():
value_tensor = np.array([[7.5, 8.56, -9.78], [10.0, -11.0, 0.0]], np.float32)

View File

@ -20,11 +20,13 @@ import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_read_tensors_nonexist_node"
@security_off_wrap
def test_sync_trans_read_tensors_nonexist_node():
tensor1 = np.array([32.0, 4096.0], np.float32)

View File

@ -20,11 +20,13 @@ import shutil
import numpy as np
import mindspore.offline_debug.dbg_services as d
from dump_test_utils import compare_actual_with_expected, build_dump_structure
from tests.security_utils import security_off_wrap
GENERATE_GOLDEN = False
test_name = "sync_watchpoints"
@security_off_wrap
def test_sync_trans_false_watchpoints():
if GENERATE_GOLDEN: