!29062 Add comments for dump and debugger code, remove unused functions

Merge pull request !29062 from parastooashtari/debugger_marker
2022-01-19 16:22:44 +00:00 · 2022-01-19 16:22:44 +00:00 · 180b101ad5
parent 8227dff6bf c6f5fb06f2
commit 180b101ad5
19 changed files with 791 additions and 19 deletions
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@ -608,6 +608,7 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {

 bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedLoopSink(); }

+// Ascend old runtime.
 void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
                                    const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
 #ifdef ENABLE_DEBUGGER
@ -625,6 +626,7 @@ void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_g
 #endif
 }

+// Ascend old runtime.
 void AscendSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
                                     const std::vector<tensor::TensorPtr> &, VectorRef *const) {
  // summary
@ -1659,6 +1661,7 @@ void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
 }

 #ifdef ENABLE_DEBUGGER
+// Load graphs and their children for Ascend old runtime.
 void AscendSession::LoadGraphsToDbg(NotNull<KernelGraphPtr> graph,
                                    NotNull<std::set<KernelGraphPtr> *> const memo) const {
  if (memo->find(graph) != memo->end()) {
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@ -126,7 +126,7 @@ void GPUSession::Init(uint32_t device_id) {
  }
 #ifndef ENABLE_SECURITY
  auto &json_parser = DumpJsonParser::GetInstance();
-  // Dump json config file if dump is enabled
+  // Dump json config file if dump is enabled for GPU old runtime.
  json_parser.CopyDumpJsonToDir(rank_id_);
  json_parser.CopyMSCfgJsonToDir(rank_id_);
 #endif
@ -413,7 +413,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {

 GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
  MS_EXCEPTION_IF_NULL(graph);
-  // Prepare ms context info for dump .pb graph
+  // Prepare ms context info for dump .pb graph for GPU old runtime.
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
@ -471,6 +471,7 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
  }
 #endif
 #ifndef ENABLE_SECURITY
+  // GPU old runtime.
  if (json_parser.e2e_dump_enabled()) {
    graph->set_root_graph_id(graph->graph_id());
    std::string final_graph = "trace_code_graph_" + std::to_string(graph->graph_id());
@ -509,6 +510,7 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
  return graph->graph_id();
 }

+// GPU old runtime.
 void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
                                 const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
 #ifdef ENABLE_DEBUGGER
@ -525,6 +527,7 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
 #endif
 }

+// GPU old runtime.
 void GPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
                                  const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
  // Summary
@ -730,6 +733,7 @@ void GPUSession::DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph) con
 }

 void GPUSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
+  // Dump graph and graph history file if e2e_dump is enabled and update cur_dump_iter for GPU old runtime.
  if (debugger_->DebuggerBackendEnabled()) {
    MS_EXCEPTION_IF_NULL(kernel_graph);
    E2eDump::DumpRunIter(kernel_graph, rank_id_);
--- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
+++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
@ -91,6 +91,12 @@ bool DumpJsonParser::IsDumpEnabled() {
  return true;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Parse the configuration option in dump json file pointed by environment variable MINDSPORE_DUMP_CONFIG.
+ */
 void DumpJsonParser::Parse() {
  std::lock_guard<std::mutex> guard(lock_);
  if (already_parsed_) {
@ -144,6 +150,12 @@ void WriteJsonFile(const std::string &file_path, const std::ifstream &json_file)
  ChangeFileMode(file_path, S_IRUSR);
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Copy the dump configuration file to the root directory of dump path.
+ */
 void DumpJsonParser::CopyDumpJsonToDir(uint32_t rank_id) {
  this->Parse();
  if (!IsDumpEnabled()) {
@ -165,6 +177,12 @@ void DumpJsonParser::CopyDumpJsonToDir(uint32_t rank_id) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Copy the hccl configuration file to the root directory of dump path.
+ */
 void DumpJsonParser::CopyHcclJsonToDir(uint32_t rank_id) {
  if (!IsDumpEnabled()) {
    return;
@ -186,6 +204,13 @@ void DumpJsonParser::CopyHcclJsonToDir(uint32_t rank_id) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Copy the mindspore configuration file to the root directory of dump path. It provides the device and
+ * ms_version information.
+ */
 void DumpJsonParser::CopyMSCfgJsonToDir(uint32_t rank_id) {
  if (!IsDumpEnabled()) {
    return;
@ -217,6 +242,12 @@ bool DumpJsonParser::DumpEnabledForIter() const {
  return ((e2e_dump_enabled_ || async_dump_enabled_) && IsDumpIter(cur_dump_iter_));
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Dump data in the given address into npy file.
+ */
 bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, size_t len, const ShapeVector &shape,
                                TypeId type) {
  if (filename.empty() || data == nullptr || len == 0) {
@ -595,6 +626,12 @@ void DumpJsonParser::JudgeDumpEnabled() {
  JsonConfigToString();
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Check if the given op needs to be dumped based the configuration option.
+ */
 bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {
  bool need_dump = false;
  switch (dump_mode_) {
@ -617,6 +654,12 @@ bool DumpJsonParser::NeedDump(const std::string &op_full_name) const {
  return need_dump;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Increment the count of dumping for given kernel.
+ */
 void DumpJsonParser::MatchKernel(const std::string &kernel_name) {
  auto iter = kernels_.find(kernel_name);
  if (iter == kernels_.end()) {
@ -637,6 +680,12 @@ void DumpJsonParser::PrintUnusedKernel() {
  }
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Generate the directory path where overflow bin file locates.
+ */
 std::string DumpJsonParser::GetOpOverflowBinPath(uint32_t graph_id) const {
  std::string bin_path;
  bin_path.append(path_);
@ -674,6 +723,12 @@ bool DumpJsonParser::OutputNeedDump() const {
  return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Obtain the cell dump flag of each operators in the given kernel graph.
+ */
 void DumpJsonParser::GetCellDumpFlag(const session::KernelGraph &kernel_graph) {
  if (dump_mode_ != DUMP_KERNELS_WITH_FLAG) {
    return;
--- a/mindspore/ccsrc/debug/data_dump/dump_utils.cc
+++ b/mindspore/ccsrc/debug/data_dump/dump_utils.cc
@ -37,6 +37,14 @@ uint32_t ConvertPhysicalDeviceId(uint32_t device_id) {
  return kernel_runtime->device_id();
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Generate dir path to dump data. It will be in these formats:
+ * 1) tensor/statistic: /dump_path/rank_{rank_id}/{net_name}/{graph_id}/{iter_num}.
+ * 2) constant data: /dump_path/rank_{rank_id}/{net_name}/{graph_id}/constants/.
+ */
 std::string GenerateDumpPath(uint32_t graph_id, uint32_t rank_id, bool is_cst) {
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  std::string net_name = dump_json_parser.net_name();
@ -66,6 +74,12 @@ void GetFileKernelName(NotNull<std::string *> kernel_name) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Get the actual tensor shape for dumping based on trans_flag option in configuration json file.
+ */
 void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull<ShapeVector *> int_shapes, bool trans_flag) {
  if (trans_flag) {
    *int_shapes = trans::GetRuntimePaddingShape(node, index);
@ -76,6 +90,12 @@ void GetDumpIntShape(const AnfNodePtr &node, size_t index, NotNull<ShapeVector *
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Dump the data in memory into file path.
+ */
 void DumpMemToFile(const std::string &file_path, const device::DeviceAddress &addr, const ShapeVector &int_shapes,
                   const TypeId &type, bool trans_flag) {
  auto format = kOpFormat_DEFAULT;
@ -92,6 +112,12 @@ uint64_t GetTimeStamp() {
  return timestamp;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU, CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Remove scope from operator name. The default separator is "--".
+ */
 std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const std::string &separator) {
  std::size_t found = fullname_with_scope.rfind(separator);
  std::string op_name;
@ -101,6 +127,13 @@ std::string GetOpNameWithoutScope(const std::string &fullname_with_scope, const
  return op_name;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU, CPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Dump string content into file path. Current purpose is to save operator overflow information in json
+ * file in ascend a+m dump mode.
+ */
 void DumpToFile(const std::string &file_name, const std::string &dump_str) {
  if (dump_str.empty()) {
    MS_LOG(ERROR) << "Failed to dump empty tensor data.";
--- a/mindspore/ccsrc/debug/data_dump/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/data_dump/e2e_dump.cc
@ -102,6 +102,12 @@ bool E2eDump::IsDeviceTargetGPU() {
  return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is for dumping tensor in memory to disk in GPU machine.
+ */
 void E2eDump::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
                               const device::DeviceAddress &addr, const ShapeVector &int_shapes,
                               const TypeId &host_type, const TypeId &device_type, bool trans_flag, size_t slot,
@ -397,6 +403,13 @@ void E2eDump::UpdateIterDumpSetup(const session::KernelGraph *graph, bool sink_m
  dump_json_parser.UpdateDumpIter();
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is for updating dump iteration for GPU and ascend old runtime and ascend super
+ * kernel MindRT.
+ */
 void E2eDump::DumpSetup(const session::KernelGraph *graph) {
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  bool sink_mode = (ConfigManager::GetInstance().dataset_mode() || E2eDump::isDatasetGraph(graph));
@ -406,11 +419,25 @@ void E2eDump::DumpSetup(const session::KernelGraph *graph) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: This function is for updating dump iteration for GPU and kernel by kernel ascend MindRT dump.
+ */
 void E2eDump::UpdateIterMindRTDump() {
  // update dump iter for GPU and kernel by kernel ascend dump.
  DumpJsonParser::GetInstance().UpdateDumpIter();
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
+ * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
+ * each epoch and dumps all the iterations in the epoch to the graph history file.
+ */
 void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
  auto &json_parser = DumpJsonParser::GetInstance();
  if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
@ -454,6 +481,13 @@ void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
  ChangeFileMode(file_name, S_IRUSR);
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
+ * super-kernel mindRT in Ascend.
+ */
 void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
  MS_EXCEPTION_IF_NULL(graph);
  bool success = false;
@ -491,6 +525,12 @@ void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, cons
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
+ */
 bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger) {
  bool success = false;
  auto &dump_json_parser = DumpJsonParser::GetInstance();
@ -529,6 +569,13 @@ bool E2eDump::isDatasetGraph(const session::KernelGraph *graph) {
 }

 #ifdef ENABLE_D
+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is for ascend A+M dump only. It parses and converts each slot of tensor in DumpData object
+ * and dump the tensor data in npy file or statistic data in csv file.
+ */
 void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
                               char *data_ptr) {
  // dump input tensors
@ -555,6 +602,12 @@ void E2eDump::DumpTensorToFile(const std::string &dump_path, const debugger::dum
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: It serves for A+M dump. Save statistic of the tensor data into dump path as configured.
+ */
 template <typename T>
 bool DumpTensorStatsIfNeeded(const std::string &dump_path, const T &tensor, char *data_ptr, const std::string &io,
                             uint32_t slot, const ShapeVector &shape, TypeId type) {
@ -591,6 +644,13 @@ bool DumpTensorStatsIfNeeded(const std::string &dump_path, const T &tensor, char
  return stat_dump.DumpTensorStatsToFile(dump_path.substr(0, pos), data);
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: It serves for A+M dump. Parse each attributes in Dumpdata proto object from device format to mindspore
+ * supported format and save tensor data or statistic as configured.
+ */
 template <typename T>
 bool E2eDump::ConvertFormatForTensorAndDump(std::string dump_path, const T &tensor, char *data_ptr,
                                            const std::string &io, uint32_t slot) {
@ -707,6 +767,12 @@ nlohmann::json E2eDump::ParseOverflowInfo(char *data_ptr) {
  return overflow_info;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is for Ascend A+M dump. It parses and dump op overflow info in json file.
+ */
 void E2eDump::DumpOpDebugToFile(const std::string &dump_path, const debugger::dump::DumpData &dump_data,
                                char *data_ptr) {
  std::string out_path = dump_path + ".output.";
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@ -71,6 +71,13 @@ DebugServices &DebugServices::operator=(const DebugServices &other) {
  return *this;
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Create a watchpoint_t object and set the watchpoint's variables and add the watchpoint to the
+ * watchpoint_table.
+ */
 void DebugServices::AddWatchpoint(
  unsigned int id, unsigned int watch_condition, float parameter,
  const std::vector<std::tuple<std::string, bool>> &check_node_list, const std::vector<parameter_t> &parameter_list,
@ -83,9 +90,11 @@ void DebugServices::AddWatchpoint(
  watchpoint_item.condition.type = static_cast<CONDITION_TYPE>(watch_condition);
  watchpoint_item.condition.parameter = parameter;
  watchpoint_item.check_node_list = check_node_list;
+  // For offline debugger check_node_device_list is not nullptr.
  if (check_node_device_list != nullptr) {
    watchpoint_item.check_node_device_list = *check_node_device_list;
  }
+  // For offline debugger check_node_graph_list is not nullptr.
  if (check_node_graph_list != nullptr) {
    watchpoint_item.check_node_graph_list = *check_node_graph_list;
  }
@ -98,6 +107,13 @@ void DebugServices::RemoveWatchpoint(unsigned int id) {
  (void)watchpoint_table_.erase(id);
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns a tensor summary unique pointer based on the given tensor_dtype, returns nullptr if the type is
+ * not supported.
+ */
 std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData> &tensor,
                                              const void *const previous_tensor_ptr, uint32_t num_elements,
                                              uint32_t prev_num_elements, int tensor_dtype) {
@ -160,6 +176,12 @@ std::unique_ptr<ITensorSummary> GetSummaryPtr(const std::shared_ptr<TensorData>
  }
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns TensorStat for the given tensor based on the base_summary_ptr.
+ */
 DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_ptr<TensorData> &tensor) {
  if (tensor == nullptr) {
    MS_LOG(WARNING) << "Tensor is nullptr, returning empty tensor statistics.";
@ -184,7 +206,15 @@ DebugServices::TensorStat DebugServices::GetTensorStatistics(const std::shared_p

  return tensor_stat_data;
 }
+
 #ifdef OFFLINE_DBG_MODE
+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns previous_tensor_ptr if graph hisotry file is found and the current iteration is not the first
+ * run iteration for tensor's graph.
+ */
 const void *DebugServices::GetPrevTensor(const std::shared_ptr<TensorData> &tensor, bool previous_iter_tensor_needed,
                                         uint32_t *prev_num_elements, bool *history_not_found) {
  MS_EXCEPTION_IF_NULL(tensor);
@ -309,6 +339,13 @@ void DebugServices::SetCheckWatchpointsResult(
 }

 #ifdef OFFLINE_DBG_MODE
+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Sets and checks the OUT_OF_MEMORY error_code (for memory limit feature) and  NO_VALUE error_code (for
+ * new python API feature). Sets checkwatchpoint results.
+ */
 void DebugServices::CheckOutofMemoryandNoValue(
  const bool no_mem_to_read, const bool error_on_no_value, const std::vector<watchpoint_t> watchpoints_to_check,
  int chunk_id, partitioned_names *const chunk_names, partitioned_names *const chunk_slots,
@ -339,6 +376,14 @@ void DebugServices::CheckOutofMemoryandNoValue(
  }
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: After finishing checking watchpoint, set the tensor to not-in-use status (for memory control
+ * feature) by pushing it to eviction candidate queue. So it can be evicted from memory anytime if the memory is
+ * required by other nodes' checking. If previous_tensor exists, change their status in a pair.
+ */
 void DebugServices::SetTensorToNotInUse(const std::shared_ptr<TensorData> &tensor, const void *previous_tensor_ptr) {
  // set the tensor into not-in-use status in tensor_loader.
  auto tensor_name = tensor->GetName();
@ -353,6 +398,16 @@ void DebugServices::SetTensorToNotInUse(const std::shared_ptr<TensorData> &tenso
 #endif

 #ifdef ONLINE_DBG_MODE
+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Compares the current root graph id with the given graph id and returns false if they are not equal
+ * for GPU mindRT and Ascend. Otherwise, it returns true. The objectives of this function are: 1) Check if tensor's
+ * root_graph_id is different from current_root_graph_id and skip checkwatchpoint for the tensor if these values are
+ * different. 2) Set prev_tensor_ptr to nullptr if current_root_graph_id is different from prev_root_graph_id. 3) Skip
+ * reading tensor if tensor's root_graph_id is different from current_root_graph_id.
+ */
 bool DebugServices::CompareCurrentRootGraph(uint32_t id) {
  auto debugger = Debugger::GetInstance();
  auto ms_context = MsContext::GetInstance();
@ -368,6 +423,13 @@ bool DebugServices::CompareCurrentRootGraph(uint32_t id) {
  return true;
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns the previous tensor pointer if the current root graph id is equal to previous root graph id and
+ * prev_tensor_data is not nullptr.
+ */
 const void *DebugServices::PreparePrevTensor(uint32_t *prev_num_elements, const std::string &tensor_name) {
  std::shared_ptr<TensorData> prev_tensor_data;
  if (!CompareCurrentRootGraph(Debugger::GetInstance()->GetPrevRootGraphId())) {
@ -391,6 +453,15 @@ void DebugServices::CheckHistoryErrorCode(int *error_code, bool history_not_foun
    *error_code = ITensorSummary::HISTORY_NOT_FOUND;  // error code for history not found
  }
 }
+
+/*
+ * Feature group: Offline debugger, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: For all the tensors in the given chunk, reads the tensors, checks all the watchpoints and sets the
+ * watchpoint hit result. Checkwatchpoint process might be affected by memory limit, whether the read tensor was
+ * successfully and whether we have a multi root graph scenario. All of aforementioned checks are done in this function.
+ */
 void DebugServices::CheckWatchpointsForTensor(
  partitioned_names *const chunk_names, partitioned_names *const chunk_slots,
  partitioned_numbers *const chunk_conditions, partitioned_id *const chunk_watchpoint_id,
@ -501,6 +572,14 @@ void DebugServices::CheckWatchpointsForTensor(
  }
 }

+/*
+ * Feature group: Offline debugger, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function checks the watchpoints for the given tensor list by dividing the tensor list into chunks.
+ * Each chunk is handled by a separate thread and then the result of check watchpoint for each thread is gathered and
+ * sorted. In the end, the time for checking the watchpoint in the current step is reported.
+ */
 void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::vector<std::string> *const slot,
                                     std::vector<int> *const condition, std::vector<unsigned int> *const watchpoint_id,
                                     std::vector<std::vector<parameter_t>> *const parameters,
@ -574,6 +653,13 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
  MS_LOG(INFO) << "CheckWatchpoints Took: " << ms_double.count() / 1000 << "s";
 }

+/*
+ * Feature group: Offline debugger, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Sorts the result of watchpoint hit for the online and offline debugger. This sorting for the online
+ * debugger is based on the execution order and for the offline debugger is based on the time stamp.
+ */
 void DebugServices::SortWatchpointsInfo(
  std::vector<std::future<void>> *const tensor_future_vec, std::vector<int> *const exec_order,
  std::vector<std::string> *const time_stamps, uint64_t *const tensor_list_byte_size,
@ -632,6 +718,15 @@ void DebugServices::SortWatchpointsInfo(
 }

 #ifdef OFFLINE_DBG_MODE
+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Read tensor info from the given file. If memory control feature is configured to be enabled, it checks
+ * if the tensor can fit in memory before reading. There are two situations to return false: 1)tensor size is greater
+ * than the total preset memory limit. 2) Evicting all NOT-In-USE tensors from tensor_list_map_ cannot make enough room
+ * for the tensor.
+ */
 void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std::string &file_name,
                                      std::string *const tensor_type, std::size_t *const size,
                                      std::vector<int64_t> *const shape, std::vector<char> **const data_buffer,
@ -712,6 +807,13 @@ void DebugServices::ReadTensorFromNpy(const std::string &tensor_name, const std:
  }
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is to convert files in each directory from device format to host format and append the
+ * converted npy file name into AsyncFilePool. It's for Ascend async dump only.
+ */
 void DebugServices::ConvertToHostFormat(const DirMap &dir_to_files_map, AsyncFilePool *const result_list) {
  std::string file_format = "npy";
  for (auto const &d : dir_to_files_map) {
@ -731,7 +833,7 @@ void DebugServices::ConvertToHostFormat(const DirMap &dir_to_files_map, AsyncFil
    }
    MS_LOG(INFO) << "Number of files to convert: " << files_to_convert_in_dir.size();
    if (!files_to_convert_in_dir.empty()) {
-      // Look for the installation path to the conver_async package. If not found, throw exception and terminate the
+      // Look for the installation path to the convert_async package. If not found, throw exception and terminate the
      // later task.
      {
        pybind11::gil_scoped_acquire acquire;
@ -748,6 +850,13 @@ void DebugServices::ConvertToHostFormat(const DirMap &dir_to_files_map, AsyncFil
  }
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is to iterate through dump directory (dump_key) and search all the converted npy files and
+ * append into AsyncFilePool. It's for Ascend async dump only.
+ */
 void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &files_after_convert_in_dir,
                                               const std::string &dump_key, AsyncFilePool *const result_list,
                                               const std::string &file_format) {
@ -786,6 +895,14 @@ void DebugServices::ProcessConvertToHostFormat(const std::vector<std::string> &f
  (void)closedir(d_handle);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Node name string prefixes with scope and separates with slash "/". While the npy files in the tensor
+ * dump path do not include scope in their name. The objective of this function is to remove scope from the node name to
+ * match the file.
+ */
 std::string GetNodeNameWithoutScope(const std::string &dump_style_name) {
  if (dump_style_name.empty()) {
    return "";
@ -799,6 +916,14 @@ std::string GetNodeNameWithoutScope(const std::string &dump_style_name) {
  return dump_style_name.substr(last_scope_marker + delim.size());
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: This function is to search and prepare the target npy file to be read for each node. If the found file
+ * is already npy format, push it to AsyncFilePool; Otherwise, use conversion tool in convert_async.py to transfer it to
+ * npy format beforehand.
+ */
 void DebugServices::ConvertReadTensors(std::vector<std::string> backend_name, std::vector<size_t> slot,
                                       std::vector<unsigned int> device_id, std::vector<unsigned int> iteration,
                                       std::vector<unsigned int> root_graph_id, AsyncFilePool *const result_list) {
@ -949,6 +1074,13 @@ void DebugServices::GetTensorDataInfoAsync(const std::vector<std::tuple<std::str
  }
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: For the two possible modes (rank and graph), this function returns the rank_id or graph_id extracted
+ * from the given directory name otherwise, it returns UINT32_MAX to identify an invalid rank or graph id.
+ */
 uint32_t GetRankOrGraphId(const std::string &mode, const std::string &name) {
  std::regex re;
  if (mode == "rank") {
@ -994,6 +1126,13 @@ std::vector<uint32_t> DebugServices::GetDumpRankIdList() {
  return rank_id_list;
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Searches the current dump directory and for each rank_id in rank_id_list extracts the existing
+ * graph_ids. Then the history file is read for all the extracted graph_ids.
+ */
 void DebugServices::CheckDumpGraphIdList(std::vector<uint32_t> rank_id_list) {
  std::string net_name = GetNetName();
  std::string dump_dir = GetDumpDir();
@ -1038,6 +1177,13 @@ void DebugServices::SetGraphsHistory() {
  CheckDumpGraphIdList(rank_id_list);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Reads the graph history file (containing iteration numbers in which the graph was executed) and stores
+ * the data in graphs_run_history_ for the given rank and graph id.
+ */
 void DebugServices::ReadGraphsHistory(uint32_t rank_id, uint32_t root_graph_id) {
  std::tuple<uint32_t, uint32_t> rank_and_graph(rank_id, root_graph_id);
  if (graphs_run_history_.find(rank_and_graph) != graphs_run_history_.end()) {
@ -1060,6 +1206,14 @@ void DebugServices::ReadGraphsHistory(uint32_t rank_id, uint32_t root_graph_id)
  (void)closedir(d_handle);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns a map with a tuple as the key (rank, graph) and a vector as the value. This vector contains a
+ * tuple with two elements, the first element is the node name and the second element is whether the node is output or
+ * not.
+ */
 std::map<std::tuple<uint32_t, uint32_t>, std::vector<std::tuple<std::string, bool>>> DebugServices::GetAllWpNodes() {
  std::map<std::tuple<uint32_t, uint32_t>, std::vector<std::tuple<std::string, bool>>> rank_and_graph_to_nodes;
  for (auto w_table_item : watchpoint_table_) {
@ -1081,6 +1235,13 @@ std::map<std::tuple<uint32_t, uint32_t>, std::vector<std::tuple<std::string, boo
  return rank_and_graph_to_nodes;
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: For the given graph and rank id, reads the graph history file, stores all the run iterations for the
+ * graph in a vector and inserts it to graphs_run_history_ map.
+ */
 void DebugServices::ReadGraphRunIter(std::string file_path, std::tuple<uint32_t, uint32_t> rank_and_graph) {
  std::ifstream infile;
  std::string line;
@ -1106,6 +1267,13 @@ void DebugServices::ReadGraphRunIter(std::string file_path, std::tuple<uint32_t,
    std::pair<std::tuple<uint32_t, uint32_t>, std::vector<uint32_t>>(rank_and_graph, run_iters_vec));
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Creates a tensor_data object and sets its variables based on the function arguments and add the tensor
+ * to the tensor_list_map_.
+ */
 void DebugServices::AddToTensorData(const std::string &backend_name, const std::string &time_stamp,
                                    const std::size_t slot, const unsigned int iteration, const unsigned int device_id,
                                    const unsigned int root_graph_id, const bool is_output, const std::size_t data_size,
@ -1139,6 +1307,13 @@ void DebugServices::AddToTensorData(const std::string &backend_name, const std::
  result_list->push_back(tensor_data);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Generate a string in format of {no-scope-op-name}.{input-output}.{slot} to check and match files to
+ * read.
+ */
 void DebugServices::SetPrefixToCheck(std::string *const prefix_dump_file_name, std::string *const slot_string_to_check,
                                     std::string *const dump_style_kernel_name, size_t slot, bool is_output) {
  std::string dump_style_name_part = *dump_style_kernel_name;
@ -1179,6 +1354,13 @@ std::string GetTimeStampStr(std::string file_path) {
  return "";
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Search files in dir (sync mode) or in AsyncFilePool (async mode) for the one that meets the filename
+ * prefix and read the file into memory.
+ */
 void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std::vector<size_t> slot,
                                     std::vector<unsigned int> device_id, std::vector<unsigned int> iteration,
                                     std::vector<unsigned int> root_graph_id, const std::vector<bool> &is_output,
@ -1216,7 +1398,6 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
    }
    MS_LOG(INFO) << "specific_dump_dir " << specific_dump_dir;

-    // search files in dir for the one that meets the filename prefix and read the file into memory
    if (is_sync_mode_ || is_cst) {
      ReadDumpedTensorSync(prefix_dump_file_name, specific_dump_dir, backend_name[i], slot[i], device_id[i],
                           iteration[i], root_graph_id[i], is_output[i], result_list, no_mem_to_read);
@ -1227,7 +1408,14 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
    }
  }
 }
-
+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: For both sync and async dump, gets the newest matched file path and reads the npy file and add the
+ * tenosr_data object to tensor_list_map_. If there is no matched file, an empty tensor_data object is created with
+ * data_size = 0, empty shape and nullptr buffer.
+ */
 void DebugServices::ReadFileAndAddToTensor(const bool found, const std::vector<std::string> &matched_paths,
                                           const std::string &backend_name, const unsigned int device_id,
                                           const unsigned int root_graph_id, const bool &is_output, size_t slot,
@ -1254,6 +1442,13 @@ void DebugServices::ReadFileAndAddToTensor(const bool found, const std::vector<s
  }
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Looks for the files that match the node_name (in the dump directory) for sync dump, read the newest file
+ * and add the related tensor_data object.
+ */
 void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_name, const std::string &specific_dump_dir,
                                         const std::string &backend_name, size_t slot, const unsigned int device_id,
                                         unsigned int iteration, unsigned int root_graph_id, const bool &is_output,
@ -1296,6 +1491,13 @@ void DebugServices::ReadDumpedTensorSync(const std::string &prefix_dump_file_nam
                         no_mem_to_read, iteration, result_list);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Iterates through all the file paths in the async_file_pool and looks for the files that match the
+ * node_name for async dump, read the newest file and add the related tensor_data object.
+ */
 void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir, const std::string &prefix_dump_to_check,
                                          const std::string &slot_string_to_check, const std::string &backend_name,
                                          size_t slot, unsigned int device_id, unsigned int iteration,
@ -1322,6 +1524,15 @@ void DebugServices::ReadDumpedTensorAsync(const std::string &specific_dump_dir,
                         iteration, result_list);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Obtain opname, output_str and slot from the npy file. Make sure its return value is the same as
+ * SetPrefixToCheck(). The input/output examples look like:
+ * input: {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{output_or_input_string}.{slot}.{format}.npy
+ * output: {op_name}.{output_or_input_string}.{slot}
+ */
 std::string DebugServices::GetStrippedFilename(const std::string &file_name) {
  // strip off the task_id, stream_id, and timestamp, then compare
  size_t first_dot = file_name.find(".");
@ -1349,6 +1560,15 @@ std::string DebugServices::GetStrippedFilename(const std::string &file_name) {
  return stripped_file_name;
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Gets a list of the nodes that should be monitored, creates a vector called proto_to_dump with nodes'
+ * original names and dump style names. Then, for each node, it creates an empty tensor_data object with data_byte_size
+ * = 0 and data_ptr = nullptr and add it to the tensor_list (for both sync and async dump). This tensor_list is used for
+ * checkwatchpoint functions.
+ */
 std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(unsigned int iteration,
                                                                                AsyncFilePool *const async_file_pool,
                                                                                bool error_on_no_value) {
@ -1405,6 +1625,13 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
  return tensor_list;
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Iterates through the dump directory and for each file it looks for a match in the file name with node
+ * names in proto_to_dump vector.
+ */
 void DebugServices::ProcessTensorDataSync(const std::vector<std::tuple<std::string, std::string>> &proto_to_dump,
                                          const std::string &specific_dump_dir, unsigned int iteration,
                                          unsigned int device_id, unsigned int root_graph_id,
@ -1463,6 +1690,13 @@ std::string DebugServices::IterationString(unsigned int iteration) {
 }
 #endif

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Searches for tensor in the loaded tensors, if the tensor is found and tensor's root_graph_id is equal to
+ * current root_graph_id, it updates the given vectors.
+ */
 void DebugServices::ReadNodesTensors(const std::vector<std::string> &name, std::vector<std::string> *const ret_name,
                                     std::vector<const char *> *const data_ptr, std::vector<ssize_t> *const data_size,
                                     std::vector<unsigned int> *const dtype,
@ -1557,6 +1791,14 @@ bool DebugServices::LoadNewTensor(const std::shared_ptr<TensorData> &tensor, boo
  return tensor_loader_->LoadNewTensor(tensor, keep_prev);
 }

+/*
+ * Feature group: Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns the previous iteration in which tensor's graph was executed, if the current step is the first
+ * run iteration for the graph or graph history file is not available it returns UINT32_MAX to identify invalid
+ * prev_iteration.
+ */
 uint32_t DebugServices::GetPrevIteration(const std::shared_ptr<TensorData> &tensor) {
  uint32_t prev_iter;
  uint32_t rank_id = tensor->GetDeviceId();
@ -1704,6 +1946,13 @@ void DebugServices::AddOpOverflowOpNames(const std::string overflow_bin_path, st
  }
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Checks whether for the given node the operator overflow happened or not by checking the overflow
+ * directory.
+ */
 bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int device_id, unsigned int root_graph_id,
                                    unsigned int iteration) {
  std::string overflow_bin_path = "";
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -257,6 +257,12 @@ bool Debugger::CheckDebuggerPartialMemoryEnabled() const {
  return false;
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT
+ * Description: Returns true if online debugger or dump is enabled.
+ */
 bool Debugger::DebuggerBackendEnabled() const { return CheckDebuggerDumpEnabled() || CheckDebuggerEnabled(); }

 void Debugger::Reset() {
@ -284,6 +290,13 @@ void Debugger::Reset() {
  MS_LOG(INFO) << "Release Debugger resource.";
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Sets root_graph_id for all the graphs in the compiled graph list. Sets cur_root_graph_id_ and
+ * prev_root_graph_id_ and calls PreExecute function for all the graphs.
+ */
 void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs) {
  // MindRTBackend for GPU and Ascend
  if (device_target_ == kCPUDevice) {
@ -308,12 +321,25 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: When async dump is enabled and dataset_sink_mode is true, graph_iter_num_map_ stores the number of
+ * iterations per epoch for each running graph.
+ */
 void Debugger::UpdateGraphIterMap(uint32_t graph_id, int32_t iter_num) {
  if (graph_iter_num_map_.find(graph_id) == graph_iter_num_map_.end()) {
    graph_iter_num_map_[graph_id] = iter_num;
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime.
+ * Description: For Ascend old runtime, this function sets the current and previous root graph id.
+ */
 void Debugger::SetCurrentAndPrevRootGraph(uint32_t root_graph_id) {
  // for GPU and ascend MindRT root graphs are set in PreExecuteGraphDebugger.
  if (device_target_ != kAscendDevice || MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
@ -325,8 +351,15 @@ void Debugger::SetCurrentAndPrevRootGraph(uint32_t root_graph_id) {
                << " for step: " << num_step_ << ".";
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: Old runtime.
+ * Description: In the case of GPU old runtime and when we have multiple subgraphs, we use the first run graph id to
+ * update the step number.
+ */
 void Debugger::StoreRunGraphIdList(uint32_t graph_id) {
-  // collect rungrap_ids to update step number in multigraph case
+  // collect rungrap_ids to update step number in multigraph case for GPU old runtime
  if (!rungraph_id_list_.size()) {
    rungraph_id_list_.push_back(graph_id);

@ -337,6 +370,13 @@ void Debugger::StoreRunGraphIdList(uint32_t graph_id) {
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Sets previous and current root_graph_id for Ascend old runtime, sends graphs to online debugger when
+ * debugger_enabled_ is true.
+ */
 void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
  MS_EXCEPTION_IF_NULL(graph_ptr);
  // access lock for public method
@ -386,6 +426,12 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
  suspended_at_last_kernel_ = false;
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Sends all the subgraphs to online debugger when debugger_enabled_ is true.
+ */
 void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
  // only try to enable debugger if they are not all dataset graphs
  if (!debugger_enabled_) {
@ -407,6 +453,12 @@ void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns true for e2e dump if dump is enabled for the current iteration.
+ */
 bool Debugger::DumpDataEnabledIteration() const {
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  if (!dump_json_parser.e2e_dump_enabled()) {
@ -420,6 +472,12 @@ bool Debugger::DumpDataEnabledIteration() const {
  return false;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Returns the rank_id for GPU and Ascend kernel-bykernel mindRT.
+ */
 uint32_t Debugger::GetRankID() {
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
@ -431,8 +489,13 @@ uint32_t Debugger::GetRankID() {
  return rank_id;
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Dumps graph history and parameters for GPU and Ascend kernel-by-kernel MindRT. DumpConstantData for GPU.
+ */
 void Debugger::Dump(const KernelGraphPtr &kernel_graph) const {
-  // only for GPU and kernel by kernel ascend (mindRT).
  if (!(ascend_kernel_by_kernel_ || device_target_ == kGPUDevice)) {
    return;
  }
@ -461,6 +524,12 @@ void Debugger::DumpConstantDataAscend(const KernelGraphPtr &graph) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Dumps a single node for given graph_id.
+ */
 void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id) {
  if (debugger_ && debugger_->DebuggerBackendEnabled()) {
    uint32_t rank_id = GetRankID();
@ -468,8 +537,14 @@ void Debugger::DumpSingleNode(const CNodePtr &node, uint32_t graph_id) {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: This function is used for new GPU runtime using MindRTBackend, on Ascend platform, graphs are saved in
+ * session_basic.
+ */
 void Debugger::DumpInGraphCompiler(const KernelGraphPtr &kernel_graph) {
-  // This function is used for new GPU runtime using MindRTBackend, on Ascend platform, graphs are saved in other way.
  if (device_target_ == kAscendDevice) {
    return;
  }
@ -488,6 +563,12 @@ void Debugger::DumpInGraphCompiler(const KernelGraphPtr &kernel_graph) {
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: MindRT.
+ * Description: Load and dump parameters and constant data, call postExecute and update dump iter.
+ */
 void Debugger::PostExecuteGraphDebugger() {
  // On CPU, update dump iteration， Parameters and consts are not dumped here
  if (device_target_ == kCPUDevice) {
@ -519,6 +600,12 @@ void Debugger::PostExecuteGraphDebugger() {
  }
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Send hit watchpoints, update the step number and reset loaded tensors.
+ */
 void Debugger::PostExecute() {
  // access lock for public method
  std::lock_guard<std::mutex> a_lock(access_lock_);
@ -565,6 +652,13 @@ bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) const {
  return false;
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Check and send watchpoint hit for a single node, suspend if a watchpoint is hit or we are continuing
+ * in node level.
+ */
 void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
  // access lock for public method
  std::lock_guard<std::mutex> a_lock(access_lock_);
@ -597,6 +691,12 @@ void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Get graph proto and add it to graph proto list and add loaded graph pointers to a list.
+ */
 void Debugger::LoadGraphs(const KernelGraphPtr &graph_ptr) {
  MS_EXCEPTION_IF_NULL(graph_ptr);
  if (graph_ptr_ != graph_ptr) {
@ -670,6 +770,12 @@ GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
  return model.graph();
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Send debugger backend heartbeat to online debugger every few seconds.
+ */
 void Debugger::SendHeartbeat(int32_t period) {
  int num_heartbeat_fail = 0;
  const int max_num_heartbeat_fail = 5;
@ -1407,6 +1513,12 @@ bool Debugger::CheckIp(const std::string &host) const {

 uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load a single parameter or value node.
+ */
 void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index, uint32_t root_graph_id) {
  MS_EXCEPTION_IF_NULL(anf_node);
  if (!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) {
@ -1450,6 +1562,12 @@ void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load all the parameters and value nodes for the last loaded graph.
+ */
 void Debugger::LoadParametersAndConst() {
  if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
  MS_EXCEPTION_IF_NULL(graph_ptr_);
@ -1469,6 +1587,12 @@ void Debugger::LoadParametersAndConst() {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load all the parameters and value nodes for the given graph.
+ */
 void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) {
  if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
  MS_EXCEPTION_IF_NULL(graph);
@ -1488,6 +1612,12 @@ void Debugger::LoadParametersAndConst(const KernelGraphPtr &graph) {
  }
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load all the kernels for the last loaded graph.
+ */
 void Debugger::LoadGraphOutputs() {
  if (!(debugger_enabled() && device_target_ == kAscendDevice)) return;
  MS_EXCEPTION_IF_NULL(graph_ptr_);
@ -1528,6 +1658,12 @@ void Debugger::LoadGraphOutputs() {
  }
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: MindRT.
+ * Description: Load a single node for kernel-by-kernel ascend mindRT dump.
+ */
 void Debugger::LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32_t root_graph_id) {
  if (device_target_ != kAscendDevice) {
    return;
@ -1563,10 +1699,15 @@ void Debugger::LoadNodeOutputs(const CNodePtr &node, uint32_t exec_order, uint32
  }
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: GPU.
+ * Runtime category: Old runtime.
+ * Description: Update step number if we are processing the first graph (to support multigraph).
+ */
 void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(debugger_);
-  // update step number if we are processing the first graph (to support multigraph)
  if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()) &&
      (graph->graph_id() == debugger_->GetFirstRunGraphId())) {
    // access lock for public method
@ -1575,8 +1716,13 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
  }
 }

+/*
+ * Feature group: Online debugger.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: Update step number when DebugActor::DebugOnStepEnd is called at the end of each step.
+ */
 void Debugger::UpdateStepNumGPU() {
-  // UpdateStepNum with DebugActor::DebugOnStepEnd
  if (device_target_ == kGPUDevice && (debugger_enabled_ || DumpDataEnabledIteration())) {
    // access lock for public method
    std::lock_guard<std::mutex> a_lock(access_lock_);
@ -1600,6 +1746,13 @@ bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
 }

 #ifdef ENABLE_D
+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load DumpDataBuilder object from dump_data_construct_map_ for tracking data chunks of node_name. It's
+ * for Ascend a + m dump. If not found, create a new one for it and add to dump_data_construct_map_.
+ */
 std::shared_ptr<DumpDataBuilder> Debugger::LoadDumpDataBuilder(const std::string &node_name) {
  auto iter = dump_data_construct_map_.find(node_name);
  if (iter == dump_data_construct_map_.end()) {
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.cc
@ -41,8 +41,13 @@ using KernelGraph = mindspore::session::KernelGraph;
 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;

 namespace mindspore {
+/*
+ * Feature group: Online debugger.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: Returns a vector containing real output number.
+ */
 std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
-  // define a vector containing real output number
  std::vector<size_t> real_outputs;
  // P.BatchNorm is used for training and inference
  // can add the filter list for more operators here....
@ -58,6 +63,12 @@ std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &
  return real_outputs;
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: Get kernel inputs from launch_info and load the inputs from device to host.
+ */
 void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
                uint32_t root_graph_id) {
  // get inputs
@ -86,6 +97,12 @@ void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: Get kernel outputs from launch_info and load the inputs from device to host.
+ */
 void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order,
                 uint32_t root_graph_id) {
  // get outputs
@ -116,6 +133,13 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uin
  }
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Returns true if the node needs to be read for Dump or online debugger. This function is used by GPU
+ * and Ascend kernel-by-kernel mindRT.
+ */
 bool CheckReadData(const CNodePtr &cnode) {
  auto debugger = Debugger::GetInstance();
  if (!debugger) {
@ -136,6 +160,13 @@ bool CheckReadData(const CNodePtr &cnode) {
  return read_data;
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: MindRT.
+ * Description: Load inputs and outputs of the given node if needed and dump them if dump is enabled, then it performs
+ * PostExecuteNode function on the given node.
+ */
 void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info, uint32_t exec_order) {
  auto debugger = Debugger::GetInstance();
  if (!debugger) {
@ -167,6 +198,12 @@ void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info,
  debugger->PostExecuteNode(cnode, last_kernel);
 }

+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: MindRT.
+ * Description: Load outputs of the given node and dump them if dump is enabled for Ascend kernel-by-kernel dump.
+ */
 void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order) {
  auto debugger = Debugger::GetInstance();
  if (!debugger) {
@ -192,6 +229,13 @@ void ReadDataAndDumpAscend(const CNodePtr &cnode, uint32_t exec_order) {
  }
 }

+/*
+ * Feature group: Dump, Online Debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Returns the error_info when sink_mode is true and we are in online debugger mode or dump mode for
+ * GPU, if everything is normal the error_info string will be empty.
+ */
 std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
  std::string error_info = "";
  bool sink_mode = ConfigManager::GetInstance().dataset_mode() || graph_ptr->IsDatasetGraph();
@ -208,6 +252,12 @@ std::string CheckDatasetSinkMode(const KernelGraphPtr &graph_ptr) {
  return error_info;
 }

+/*
+ * Feature group: Online Debugger.
+ * Target device group: Ascend.
+ * Runtime category: MindRT.
+ * Description: Loads graph's outputs and parameters for Ascend super kernel mode.
+ */
 void LoadDataForDebugger(const KernelGraphPtr &graph_ptr) {
  auto context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context);
@ -265,6 +315,16 @@ void SuperKernelE2eDump(const KernelGraphPtr &graph) {
 }

 #ifdef ENABLE_D
+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: It is a function to be registered to Adx server for a + m dump feature with the following steps:
+ * 1) Merge chunks into one memory segment after receiving all the data for one node.
+ * 2) Parse dump data object.
+ * 3) Convert data from device to host format.
+ * 4) Dump to disk based on configuration.
+ */
 int32_t DumpDataCallBack(const DumpChunk *dump_chunk, int32_t size) {
  MS_LOG(DEBUG) << "ADX DumpDataCallBack is called";
  string file_name = dump_chunk->fileName;
--- a/mindspore/ccsrc/debug/debugger/tensor_summary.cc
+++ b/mindspore/ccsrc/debug/debugger/tensor_summary.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -111,6 +111,13 @@ TensorSummary<T>::TensorSummary(const void *current_tensor_ptr, const void *cons
      epsilon_(1.0e-9),
      mean_sd_cal_enabled_(false) {}

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Initialize watchpoints calculators based on the watchpoint category. Process all the elements within the
+ * current tensor.
+ */
 template <typename T>
 void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoint_t> &wps) {
  InitCalculators(wps);
@ -156,6 +163,12 @@ void TensorSummary<T>::SummarizeTensor(const std::vector<DebugServices::watchpoi
  }
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Calculates statistics on chunks of data.
+ */
 template <typename T>
 void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
  if (dtype_value == DT_BOOL) {
@ -211,6 +224,12 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
  }
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Process all the elements of the chunked data and calculates the statistics.
+ */
 template <typename T>
 void TensorSummary<T>::TensorStatisticsSingleThread() {
  MeanCalculator mean_calc = MeanCalculator();
@ -244,6 +263,14 @@ void TensorSummary<T>::TensorStatisticsSingleThread() {
  avg_ = mean_calc.GetMean();
 }

+/*
+ * Feature group: Online debugger, Offline debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Returns a tuple with three elements, the first element is a bool and it is true if the watchpoint is
+ * hit. The second element is the error_code which is set in this function and the third element is the parameter_list
+ * for the watchpoint.
+ */
 template <typename T>
 std::tuple<bool, int, std::vector<DebugServices::parameter_t>> TensorSummary<T>::IsWatchpointHit(
  DebugServices::watchpoint_t wp) {
--- a/mindspore/ccsrc/debug/dump_data_builder.h
+++ b/mindspore/ccsrc/debug/dump_data_builder.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -37,6 +37,13 @@ class DumpDataBuilder {
  ~DumpDataBuilder() = default;

 #ifdef ENABLE_D
+  /*
+   * Feature group: Dump.
+   * Target device group: Ascend.
+   * Runtime category: Old runtime, MindRT.
+   * Description: This function is for A+M dump only. In each callback, allocate memory and copy the dump chunk from
+   * adx. Return false if OOM.
+   */
  bool CopyDumpChunk(const DumpChunk *dump_chunk) {
    try {
      uint32_t buf_sz = dump_chunk->bufLen;
@ -50,6 +57,14 @@ class DumpDataBuilder {
    return true;
  }

+  /*
+   * Feature group: Dump.
+   * Target device group: Ascend.
+   * Runtime category: Old runtime, MindRT.
+   * Description: This function is for A+M dump only. When receiving the last chunk of the node (is_last_chunk = true),
+   * parse and construct the dump data for dumping. It does the these steps: 1) merge all chunks for the node; 2)
+   * parse header and protobuf string; 3) memcpy tensor data to contiguous memory segment.
+   */
  bool ConstructDumpData(debugger::dump::DumpData *dump_data_proto, std::vector<char> *data_ptr) {
    if (chunk_list_.empty()) {
      return false;
--- a/mindspore/ccsrc/debug/tensor_load.h
+++ b/mindspore/ccsrc/debug/tensor_load.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -78,6 +78,13 @@ class TensorLoader {
    return std::equal(suffix.rbegin(), suffix.rend(), tensor_name.rbegin());
  }

+  /*
+   * Feature group: Dump, Online debugger and Offline debugger.
+   * Target device group: Ascend, GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: Load new tensor into tensor_list_map_ (debugger backend cache). In offline debugger, add ":prev" to
+   * the previous tensor's name to avoid segfault caused by wrongly evicting the tensor when memory limit is enabled.
+   */
  bool LoadNewTensor(std::shared_ptr<TensorData> tensor, bool keep_prev) {
    lock_.lock();
    auto tensor_name = tensor->GetName();
@ -124,6 +131,13 @@ class TensorLoader {
    return nullptr;
  }

+  /*
+   * Feature group: Online debugger.
+   * Target device group: Ascend, GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: Search and obtain TensorData for a list of tensors from tensor_list_map_ (debugger backend cache).
+   * Return nullptr if the tensor is not found.
+   */
  void SearchTensors(const std::vector<std::string> &search_list,
                     std::vector<std::tuple<std::string, std::shared_ptr<TensorData>>> *result_list) {
    for (auto i : search_list) {
@ -147,6 +161,14 @@ class TensorLoader {

  bool EnableMemoryControl() { return mem_total_ > 0; }

+  /*
+   * Feature group: Offline debugger.
+   * Target device group: Ascend, GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: This function is for memory control feature only. When finishing using a tensor in offline debugger,
+   * it will be added to cache_evict_queue_ and become an eviction candidate. Once there is no memory to read in a new
+   * tensor, it will be evicted from cache.
+   */
  void AppendToCacheEvictQueue(const std::string &tensor_name) {
    std::lock_guard<std::mutex> lk(mem_lock_);
    if (std::find(cache_evict_queue_.begin(), cache_evict_queue_.end(), tensor_name) == cache_evict_queue_.end()) {
@ -155,6 +177,13 @@ class TensorLoader {
    }
  }

+  /*
+   * Feature group: Offline debugger.
+   * Target device group: Ascend, GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: This function is for memory control feature only. Check if the tensor size is greater than the preset
+   * limit. If not, evect the candidate tensor in cache_evict_queue_ to make room for it.
+   */
  bool CheckMemoryAvailable(const std::string &backend_name, const uint64_t data_size) {
    // 1. Check if the tensor can fit in the entire limit. If not, don't attempt any read or evictions and generate
    // warning.
@ -168,6 +197,13 @@ class TensorLoader {
    return ret;
  }

+  /*
+   * Feature group: Offline debugger.
+   * Target device group: Ascend, GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: This function is for memory control feature only. Greedily evict not-in-use tensors from cache queue.
+   * If no candidate in the queue, block the thread until there is any candidate available.
+   */
  bool CheckAndEvictTensorCache(const uint64_t data_size) {
    std::string candidate_name;
    uint64_t candidates_size;
@ -199,6 +235,12 @@ class TensorLoader {
  void SetMemTotal(uint64_t total_mem_size) { this->mem_total_ = total_mem_size; }

 #ifdef ONLINE_DBG_MODE
+  /*
+   * Feature group: Dump.
+   * Target device group: GPU.
+   * Runtime category: Old runtime, MindRT.
+   * Description: Load tensor data from debugger backend cache (tensor_list_map_) and dump to file in npy format.
+   */
  bool DumpTensorToFile(const std::string &tensor_name, bool trans_flag, const std::string &filepath,
                        const std::string &host_fmt, const std::vector<int64_t> &host_shape, TypeId host_type,
                        TypeId device_type, const std::string &addr_format, size_t slot) {
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -597,6 +597,12 @@ AscendDeviceAddress::~AscendDeviceAddress() {
 }

 #ifndef ENABLE_SECURITY
+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Dump tensor data to file for e2e dump.
+ */
 bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::string &host_fmt,
                                        const ShapeVector &host_shape, TypeId host_type, bool trans_flag) const {
  bool ret = false;
@ -640,6 +646,12 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std::
 #endif

 #ifdef ENABLE_DEBUGGER
+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load tensor to host and create tensor_data object for the loaded tensor.
+ */
 bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &,
                                        const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
                                        uint32_t root_graph_id) const {
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -139,6 +139,12 @@ void GPUDeviceAddress::ClearDeviceMemory() {

 GPUDeviceAddress::~GPUDeviceAddress() { ClearDeviceMemory(); }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: Old runtime, MindRT.
+ * Description: Load tensor to host and create tensor_data object for the loaded tensor.
+ */
 #ifdef ENABLE_DEBUGGER
 bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
                                     const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@ -133,6 +133,12 @@ std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &out
  return real_outputs;
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: GPU.
+ * Runtime category: Old runtime.
+ * Description: Load data and dump the node if needed.
+ */
 #ifdef ENABLE_DEBUGGER
 void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
                    const std::vector<mindspore::kernel::AddressPtr> &kernel_inputs,
@ -743,6 +749,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
 #ifdef ENABLE_DEBUGGER
  bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration();
  if (!mock && debugger_) {
+    // Update the step number for old GPU runtime.
    debugger_->UpdateStepNum(graph);
  }
 #endif
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -355,11 +355,13 @@ void KernelRuntime::RunOpClearMemory(const session::KernelGraph &graph) const {

 #ifdef ENABLE_DEBUGGER
 bool KernelRuntime::DumpDataEnabled() {
+  // Returns true if e2e dump is enabled.
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  return dump_json_parser.e2e_dump_enabled();
 }

 bool KernelRuntime::DumpDataEnabledIteration() {
+  // Returns true if e2e dump is enabled and current iteration must be dumped.
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  if (!dump_json_parser.e2e_dump_enabled()) {
    return false;
--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -32,6 +32,13 @@

 namespace mindspore {
 namespace runtime {
+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Load and read data for the given node if needed. Dump the node if dump is enabled and free the loaded
+ * memory after the dump (for GPU and ascend kernel-by-kernel).
+ */
 void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_,
                       const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
                       const AID *from_aid) {
@ -91,6 +98,12 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
  ActorDispatcher::Send(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend.
+ * Runtime category: MindRT.
+ * Description: Load data for online debugger and dump graph for e2e dump mode (Ascend super kernel mode).
+ */
 void DebugActor::DebugForGraph(const KernelGraphPtr &graph, const DeviceContext *device_context,
                               OpContext<DeviceTensor> *const op_context, const AID *from_aid) {
  MS_EXCEPTION_IF_NULL(graph);
@ -109,6 +122,12 @@ void DebugActor::DebugForGraph(const KernelGraphPtr &graph, const DeviceContext
  ActorDispatcher::Send(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU.
+ * Runtime category: MindRT.
+ * Description: Checks dataset_sink_mode and generates the related error if any exist and calls PreExecuteGraphDebugger.
+ */
 void DebugActor::DebugOnStepBegin(std::vector<KernelGraphPtr> graphs, std::vector<DeviceContext *> device_contexts,
                                  OpContext<DeviceTensor> *const op_context, const AID *from_aid) {
  MS_EXCEPTION_IF_NULL(op_context);
@ -144,6 +163,13 @@ void DebugActor::DebugOnStepBegin(std::vector<KernelGraphPtr> graphs, std::vecto
  ActorDispatcher::Send(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
 }

+/*
+ * Feature group: Dump, Online debugger.
+ * Target device group: Ascend, GPU and CPU.
+ * Runtime category: MindRT.
+ * Description: Dump parameters and constants and update dump iter for CPU. Call PostExecuteGraph Debugger for GPU and
+ * Ascend and update step number of online debugger GPU.
+ */
 void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid) {
  MS_EXCEPTION_IF_NULL(op_context);
  MS_EXCEPTION_IF_NULL(from_aid);
--- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License"){}
 * you may not use this file except in compliance with the License.
@ -415,6 +415,7 @@ GraphId GraphCompiler::CompileGraph(const FuncGraphPtr &func_graph, const Device
  auto graph_id = CompileGraphImpl(root_graph, device_context);

  // dump all graphs.
+  // for ascend mindRT.
  session_->DumpGraphs(all_graphs);

  // Cache the backend graph output nodes to front nodes with output index.
@ -488,8 +489,10 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic

 #ifdef ENABLE_DEBUGGER
  auto debugger = Debugger::GetInstance();
+  // Dump graph for GPU mindRT if dump is enabled.
  debugger->DumpInGraphCompiler(graph);
  if (debugger && debugger->DebuggerBackendEnabled()) {
+    // Load graphs for GPU and Ascend mindRT.
    debugger->LoadGraphs(graph);
  }
 #endif
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@ -294,6 +294,8 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
  (void)actor_manager->Spawn(base_recorder_actor, true);

  // Create and schedule debug actor.
+  // debugger_actor_need is true for CPU when e2e dump is enabled and for Ascend and GPU is true when debugger or dump
+  // is enabled.
 #ifndef ENABLE_SECURITY
  bool debugger_actor_need = DumpJsonParser::GetInstance().e2e_dump_enabled();
 #endif
--- a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc
@ -203,6 +203,13 @@ void InitMemReuseExecOrder(KernelGraph *kernel_graph) {
  UnfoldRecursiveExecOrder(kernel_graph);
 }
 }  // namespace
+
+/*
+ * Feature group: Dump.
+ * Target device group: Ascend.
+ * Runtime category: MindRT.
+ * Description: Parse config json file and register callback to adx.
+ */
 #ifndef ENABLE_SECURITY
 void DumpInit(uint32_t device_id) {
  auto &json_parser = DumpJsonParser::GetInstance();