!23163 Compile isolation for Profiling and Dump

Merge pull request !23163 from caifubi/master-compile-isolation-profiling-dump
This commit is contained in:
i-robot 2021-09-10 07:41:56 +00:00 committed by Gitee
commit ceb37595c3
16 changed files with 99 additions and 7 deletions

View File

@ -88,6 +88,10 @@ if(NOT ENABLE_CPU OR WIN32)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/fl/push_metrics_kernel.cc")
endif()
if(ENABLE_SECURITY)
list(REMOVE_ITEM D_SRC_LIST "rts/profiling_kernel_mod.cc")
endif()
if(ENABLE_GPU)
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cu"

View File

@ -33,9 +33,13 @@ class AscendKernelMod : public KernelMod {
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() {
#ifndef ENABLE_SECURITY
const auto &dump_json = DumpJsonParser::GetInstance();
return dump_json.NeedDump(fullname_) && dump_json.async_dump_enabled() && dump_json.op_debug_mode() == 0 &&
!is_monad_;
#else
return false;
#endif
}
void SetStream(void *stream) { stream_ = stream; }
void *GetStream() { return stream_; }

View File

@ -83,7 +83,9 @@
#include "runtime/device/ascend/ascend_bucket.h"
#include "profiler/device/common/memory_profiling.h"
#ifndef ENABLE_SECURITY
using mindspore::device::ascend::ProfilingManager;
#endif
using mindspore::profiler::MemoryProfiling;
namespace mindspore {
@ -544,8 +546,10 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
#endif
// assign stream
AssignStream(NOT_NULL(root_graph));
#ifndef ENABLE_SECURITY
// insert profiling point
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get()));
#endif
device::KernelAdjust::GetInstance().InsertOverflowCheckOperations(NOT_NULL(root_graph));
// build kernel
BuildKernel(root_graph);
@ -616,8 +620,9 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
#endif
// Assign streams for control sink and hccl and so on
AssignStream(NOT_NULL(graph));
#ifndef ENABLE_SECURITY
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get()));
#endif
device::KernelAdjust::GetInstance().InsertOverflowCheckOperations(NOT_NULL(graph));
// build kernel if node is cnode
BuildKernel(graph);

View File

@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_callback_register.h"
VMCallbackRegister &VMCallbackRegister::GetInstance() {
@ -28,3 +30,4 @@ bool VMCallbackRegister::Register(Status (*pRegProfCtrlCallback)(MsprofCtrlCallb
}
void VMCallbackRegister::ForceMsprofilerInit() {}
#endif

View File

@ -91,7 +91,9 @@ using mindspore::abstract::AbstractTuple;
using mindspore::abstract::AbstractTuplePtr;
#ifdef ENABLE_D
#ifndef ENABLE_SECURITY
using mindspore::device::ascend::ProfilingManager;
#endif
using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup;
#endif
@ -1275,7 +1277,9 @@ void InitHccl() {
ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(device_name, device_id);
MS_EXCEPTION_IF_NULL(runtime_instance);
#ifndef ENABLE_SECURITY
runtime_instance->PreInit();
#endif
(void)context::OpenTsd(ms_context);
if (!runtime_instance->Init()) {
MS_LOG(EXCEPTION) << "Runtime init failed.";
@ -1285,10 +1289,12 @@ void InitHccl() {
}
#endif
#if (defined ENABLE_D)
#ifndef ENABLE_SECURITY
if (!ProfilingManager::GetInstance().IsProfiling()) {
ProfilingManager::GetInstance().SetHcclEnabledBefProfilingEnabled();
}
#endif
#endif
}
void FinalizeHccl() {
@ -1355,6 +1361,7 @@ void ReleaseGeTsd() {
}
}
#ifndef ENABLE_SECURITY
void StartUpProfiling() {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
@ -1372,12 +1379,15 @@ void StartUpProfiling() {
runtime_instance->PreInit();
}
}
#endif
void InitPipeline() {
// set python env flag
mindspore::parse::python_adapter::set_python_env_flag(true);
#ifndef ENABLE_SECURITY
// Startup profiling before open tsd
StartUpProfiling();
#endif
// open tsd before ge initialize
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);

View File

@ -85,6 +85,19 @@ if(ENABLE_GPU)
endif()
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/profiling_callback_register.cc")
if(ENABLE_SECURITY)
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/profiling_callback_register.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/profiling_manager.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/profiling_utils.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/desc_reporter.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/graph_desc_reporter.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/op_name_task_stream_reporter.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/point_reporter.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/profiling_desc.cc")
list(REMOVE_ITEM D_SRC_LIST "ascend/profiling/reporter/task_desc_reporter.cc")
endif()
set_property(SOURCE ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
add_library(_mindspore_runtime_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST} ${TDT_SRC_LIST})

View File

@ -26,7 +26,6 @@
#include "utils/ms_context.h"
#include "utils/context/context_extends.h"
#include "utils/mpi/mpi_config.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "common/trans.h"
#include "runtime/rt.h"
#include "runtime/device/ascend/ascend_stream_assign.h"
@ -34,7 +33,10 @@
#include "runtime/device/ascend/tasksink/task_generator.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/session/kernel_build_client.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "runtime/device/ascend/profiling/profiling_utils.h"
#endif
#include "runtime/device/ascend/ascend_memory_manager.h"
#include "runtime/device/ascend/ascend_event.h"
#include "debug/data_dump/dump_json_parser.h"
@ -65,8 +67,10 @@ using mindspore::dataset::TdtHandle;
#include "backend/session/pynative_task_manager.h"
#ifndef ENABLE_SECURITY
using mindspore::device::ascend::ProfilingManager;
using mindspore::device::ascend::ProfilingUtils;
#endif
using mindspore::device::ascend::tasksink::TaskGenerator;
using mindspore::ge::model_runner::ModelRunner;
using mindspore::kernel::tbe::TbeUtils;
@ -142,6 +146,7 @@ void AscendKernelRuntime::SetCurrentContext() {
void AscendKernelRuntime::ClearGraphModelMap() {
SetCurrentContext();
#ifndef ENABLE_SECURITY
for (auto &iter : graph_data_dumper_) {
MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
auto &data_dumper = iter.second;
@ -152,6 +157,7 @@ void AscendKernelRuntime::ClearGraphModelMap() {
graph_data_dumper_.clear();
// tell users which dump kernel name not used
DumpJsonParser::GetInstance().PrintUnusedKernel();
#endif
graph_dynamic_kernel_map_.clear();
graph_kernel_events_map_.clear();
@ -164,6 +170,7 @@ void AscendKernelRuntime::ClearGraphModelMap() {
void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
SetCurrentContext();
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper";
#ifndef ENABLE_SECURITY
if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) {
MS_LOG(DEBUG) << "Unload dump info " << graph_id;
auto &data_dumper = dumper_iter->second;
@ -174,6 +181,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
} else {
MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found";
}
#endif
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " dynamic kernels";
if (auto dynamic_kernel_iter = graph_dynamic_kernel_map_.find(graph_id);
@ -224,6 +232,7 @@ void AsyncDataDumpUninit() {
}
}
#ifndef ENABLE_SECURITY
void AscendKernelRuntime::ReportProfilingData() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
@ -234,6 +243,7 @@ void AscendKernelRuntime::ReportProfilingData() {
reporter.ReportData();
}
}
#endif
void AscendKernelRuntime::ReleaseDeviceRes() {
MS_LOG(INFO) << "Ascend finalize start";
@ -250,7 +260,9 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
return;
}
SetCurrentContext();
#ifndef ENABLE_SECURITY
ReportProfilingData();
#endif
// release ge runtime
ClearGraphModelMap();
@ -272,7 +284,9 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
}
(void)ResetDevice(device_id);
#ifndef ENABLE_SECURITY
(void)ProfilingManager::GetInstance().StopProfiling();
#endif
current_graph_ = nullptr;
if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode &&
!context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
@ -281,6 +295,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
MS_LOG(INFO) << "Ascend finalize end";
}
#ifndef ENABLE_SECURITY
void AscendKernelRuntime::PreInit() {
const auto error_manager_ret = ErrorManager::GetInstance().Init();
if (error_manager_ret != 0) {
@ -295,6 +310,7 @@ void AscendKernelRuntime::PreInit() {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}
}
#endif
uint32_t AscendKernelRuntime::GetRankId() {
uint32_t rank_id;
@ -518,9 +534,11 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second);
#ifndef ENABLE_SECURITY
std::function<void *()> model_handle =
std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first);
DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle));
#endif
try {
ModelRunner::Instance().DistributeTask(model_iter->first);
@ -531,18 +549,20 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
MS_LOG(EXCEPTION) << "Distribute Task Failed, error: " << e.what();
}
#ifndef ENABLE_SECURITY
if (ProfilingManager::GetInstance().IsProfiling()) {
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, *graph);
}
LaunchDataDump(graph->graph_id());
#endif
ModelRunner::Instance().LoadModelComplete(model_iter->first);
return true;
}
#ifndef ENABLE_SECURITY
void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
const NotNull<std::function<void *()>> &model_handle) {
if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
@ -557,6 +577,7 @@ void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
}
}
#endif
void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
if (!DumpJsonParser::GetInstance().async_dump_enabled()) {

View File

@ -28,7 +28,9 @@
#include "runtime/device/ascend/ge_runtime/davinci_model.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "backend/session/session_basic.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/dump/data_dumper.h"
#endif
using std::unordered_map;
using std::vector;
@ -62,7 +64,9 @@ class AscendKernelRuntime : public KernelRuntime {
void SetContext() override;
void CreateContext() override;
const void *context() const override { return rt_context_; }
#ifndef ENABLE_SECURITY
void PreInit() override;
#endif
uint64_t GetAvailableMemMaxSize() const override;
DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kAscend; };
std::shared_ptr<DeviceEvent> CreateDeviceEvent() override;
@ -92,14 +96,16 @@ class AscendKernelRuntime : public KernelRuntime {
void ReleaseDeviceRes() override;
bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
bool CheckGraphIdValid(GraphId graph_id) const;
#ifndef ENABLE_SECURITY
void DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
const NotNull<std::function<void *()>> &model_handle);
void LaunchDataDump(GraphId graph_id);
void ReportProfilingData();
#endif
static CNodePtr GetErrorNodeName(uint32_t streamid, uint32_t taskid);
static std::string GetDumpPath();
static void DumpTaskExceptionInfo(const session::KernelGraph *graph);
static void TaskFailCallback(rtExceptionInfo *task_fail_info);
void ReportProfilingData();
static bool DeleteDumpDir(const std::string &path);
static int DeleteDumpFile(std::string path);
static std::string GetRealPath(const std::string &path);
@ -108,7 +114,9 @@ class AscendKernelRuntime : public KernelRuntime {
bool initialized_{false};
unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
#ifndef ENABLE_SECURITY
unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
#endif
std::map<std::pair<uint32_t, uint32_t>, std::string> stream_id_task_id_op_name_map_;
static std::map<std::string, uint32_t> overflow_tasks_;
static std::vector<rtExceptionInfo> task_fail_infoes_;

View File

@ -18,10 +18,8 @@
#include "runtime/device/ascend/ascend_memory_pool.h"
#include "utils/ms_context.h"
#include "runtime/mem.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "profiler/device/common/memory_profiling.h"
using mindspore::device::ascend::ProfilingManager;
using mindspore::profiler::MemoryProfiling;
namespace mindspore {

View File

@ -19,8 +19,10 @@
#include <runtime/rt.h>
#include "backend/kernel_compiler/task_stream.h"
#include "utils/ms_utils.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_utils.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
#endif
@ -260,10 +262,12 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
current_op_index++;
}
#ifndef ENABLE_SECURITY
ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list);
if (ProfilingManager::GetInstance().IsProfiling()) {
ProfilingUtils::SetGraphProfilingCNode(graph_id, profiling_cnode_list);
}
#endif
return true;
}

View File

@ -38,7 +38,9 @@ class CPUDeviceContext;
namespace ascend {
class AscendKernelRuntime;
class AscendMemoryManager;
#ifndef ENABLE_SECURITY
class DataDumper;
#endif
namespace tasksink {
class TaskGenerator;
} // namespace tasksink
@ -122,7 +124,9 @@ class DeviceAddress : public mindspore::DeviceSync {
friend class mindspore::device::gpu::GPUDeviceContext;
friend class mindspore::device::ascend::AscendKernelRuntime;
friend class mindspore::device::ascend::AscendMemoryManager;
#ifndef ENABLE_SECURITY
friend class mindspore::device::ascend::DataDumper;
#endif
friend class mindspore::device::Bucket;
};

View File

@ -42,7 +42,9 @@ size_t kNPUShape = 8;
} // namespace
namespace mindspore {
namespace device {
#ifndef ENABLE_SECURITY
using device::ascend::ProfilingUtils;
#endif
void KernelAdjust::ReorderGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
const std::vector<CNodePtr> &origin_cnode_list = kernel_graph_ptr->execution_order();
@ -804,6 +806,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
}
#ifndef ENABLE_SECURITY
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) {
if (!ascend::ProfilingManager::GetInstance().IsProfiling()) {
MS_LOG(INFO) << "No need to profiling";
@ -852,6 +855,7 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra
}
kernel_graph_ptr->set_execution_order(new_cnode_list);
}
#endif
CNodePtr KernelAdjust::CreateNPUGetFloatStatus(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
const CNodePtr &npu_alloc_cnode) {

View File

@ -27,11 +27,13 @@
#include "backend/kernel_compiler/kernel_build_info.h"
#include "backend/session/session_context.h"
#include "ir/tensor.h"
#include "runtime/device/ascend/profiling/profiling_utils.h"
#include "runtime/device/kernel_info.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_utils.h"
using mindspore::device::ascend::ProfilingTraceInfo;
using mindspore::device::ascend::ProfilingUtils;
#endif
namespace mindspore {
constexpr auto kCurLoopCountParamName = "cur_loop_count";
constexpr auto kNextLoopCountParamName = "next_loop_count";
@ -58,7 +60,9 @@ class KernelAdjust {
void InsertOverflowCheckOperations(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
void InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
bool StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
#ifndef ENABLE_SECURITY
void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
#endif
static bool NeedInsertSwitch();
CNodePtr CreateStreamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
@ -93,8 +97,10 @@ class KernelAdjust {
kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats,
const std::vector<TypeId> &type_ids);
void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs);
#ifndef ENABLE_SECURITY
void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
NotNull<session::KernelGraph *> kernel_graph_ptr);
#endif
bool ExistIndependent(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
bool ExistGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);

View File

@ -99,7 +99,9 @@ class KernelRuntime {
#endif
}
#ifndef ENABLE_SECURITY
virtual void PreInit() {}
#endif
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
virtual void GenKernelEvents(const session::KernelGraph *graph);
virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }

View File

@ -34,7 +34,9 @@ void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return
void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
bool KernelAdjust::NeedInsertSwitch() { return true; }
#ifndef ENABLE_SECURITY
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { return; }
#endif
void KernelAdjust::InsertOverflowCheckOperations(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
return;
}

View File

@ -15,7 +15,9 @@
*/
#include "runtime/device/ascend/tasksink/task_generator.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/dump/data_dumper.h"
#endif
namespace mindspore {
namespace device {
@ -26,11 +28,13 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
return true;
}
} // namespace tasksink
#ifndef ENABLE_SECURITY
void DataDumper::LoadDumpInfo() {}
void DataDumper::UnloadDumpInfo() {}
void DataDumper::OpDebugRegister() {}
void DataDumper::OpDebugUnregister() {}
DataDumper::~DataDumper() {}
#endif
} // namespace ascend
} // namespace device
} // namespace mindspore