sync code self check from dev 1.1

2021-09-30 17:13:32 -04:00 · 2021-09-30 17:13:32 -04:00 · f84b27b444
parent 180fd0d9f3
commit f84b27b444
12 changed files with 150 additions and 102 deletions
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@ -294,6 +294,7 @@ void Debugger::PreExecuteGraphDebugger(const std::vector<KernelGraphPtr> &graphs
    DumpSetup(graph);
  }
 }
+
 void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
  MS_EXCEPTION_IF_NULL(graph_ptr);
  // access lock for public method
@ -313,23 +314,7 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
  if (graph_proto_list_.size() > 1) {
    // there are more than one graphs are not dataset_graph
    if (not_dataset_graph_sum_ > 0) {
-      // only try to enable debugger if they are not all dataset graphs
-      if (!debugger_enabled_) {
-        EnableDebugger();
-      }
-      if (debugger_enabled_) {
-        // only send compiled graphs once at the initial step.
-        auto dbg_graph_ptr = graph_ptr_;
-        // use current graph ptr to load parameters
-        graph_ptr_ = graph_ptr;
-        LoadParametersAndConst();
-        // revert graph ptr to original value
-        graph_ptr_ = dbg_graph_ptr;
-
-        SendMultiGraphsAndSuspend(graph_proto_list_);
-
-        graph_proto_list_.clear();
-      }
+      SendMultiGraphsAndClear(graph_ptr);
    }
  } else if (graph_proto_list_.size() == 1) {
    // single graph, and not the initial step
@ -359,6 +344,27 @@ void Debugger::PreExecute(const KernelGraphPtr &graph_ptr) {
  // resets for the new graph
  suspended_at_last_kernel_ = false;
 }
+
+void Debugger::SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr) {
+  // only try to enable debugger if they are not all dataset graphs
+  if (!debugger_enabled_) {
+    EnableDebugger();
+  }
+  if (debugger_enabled_) {
+    // only send compiled graphs once at the initial step.
+    auto dbg_graph_ptr = graph_ptr_;
+    // use current graph ptr to load parameters
+    graph_ptr_ = graph_ptr;
+    LoadParametersAndConst();
+    // revert graph ptr to original value
+    graph_ptr_ = dbg_graph_ptr;
+
+    SendMultiGraphsAndSuspend(graph_proto_list_);
+
+    graph_proto_list_.clear();
+  }
+}
+
 bool Debugger::DumpDataEnabledIteration() const {
  auto &dump_json_parser = DumpJsonParser::GetInstance();
  if (!dump_json_parser.e2e_dump_enabled()) {
@ -382,6 +388,7 @@ uint32_t Debugger::GetRankID() {
  uint32_t rank_id = device_context->GetRankID();
  return rank_id;
 }
+
 void Debugger::Dump(const KernelGraphPtr &kernel_graph) const {
  uint32_t rank_id = GetRankID();
  if (debugger_ && debugger_->DebuggerBackendEnabled()) {
@ -406,6 +413,7 @@ void Debugger::DumpSetup(const KernelGraphPtr &kernel_graph) const {
  E2eDump::DumpSetup(kernel_graph.get(), rank_id);
  MS_LOG(INFO) << "Finish!";
 }
+
 void Debugger::DumpInGraphCompiler(const KernelGraphPtr &kernel_graph) {
  // This function will be called for new GPU runtime using MindRTBackend
  auto &json_parser = DumpJsonParser::GetInstance();
@ -491,6 +499,7 @@ bool Debugger::ReadNodeDataRequired(const CNodePtr &kernel) const {
  }
  return false;
 }
+
 void Debugger::PostExecuteNode(const CNodePtr &kernel, bool last_kernel) {
  // access lock for public method
  std::lock_guard<std::mutex> a_lock(access_lock_);
@ -1020,7 +1029,7 @@ std::list<TensorProto> Debugger::LoadTensors(const ProtoVector<TensorProto> &ten
      // add tensor to result list and increment result_index to check next item in ret_name
      tensor_list.push_back(tensor_item);
      if (size_iter > INT_MAX - g_chunk_size) {
-        MS_EXCEPTION(ValueError) << size_iter << " + " << g_chunk_size << " would lead to integer overflow！";
+        MS_EXCEPTION(ValueError) << size_iter << " + " << g_chunk_size << " would lead to integer overflow!";
      }
      size_iter += g_chunk_size;
    }
@ -1434,6 +1443,7 @@ void Debugger::UpdateStepNum(const session::KernelGraph *graph) {
    ++num_step_;
  }
 }
+
 void Debugger::UpdateStepNumGPU() {
  // UpdateStepNum with DebugActor::DebugOnStepEnd
  if (device_target_ == kGPUDevice && (debugger_enabled_ || DumpDataEnabledIteration())) {
@ -1452,6 +1462,7 @@ void Debugger::ClearCurrentData() {
    }
  }
 }
+
 bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
  return debug_services_->TensorExistsInCurrent(tensor_name);
 }
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@ -189,6 +189,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> {

  void SendMultiGraphsAndSuspend(const std::list<GraphProto> &graph_proto_list);

+  // send multi_graphs and clear the graph_proto_list_
+  void SendMultiGraphsAndClear(const KernelGraphPtr &graph_ptr);
+
  // wait for command and process command
  // send command request and process reply in a loop
  // break if RunCMD
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.cc
@ -35,9 +35,9 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
 namespace mindspore {
 static const size_t PARAMETER_OUTPUT_INDEX = 0;

-std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
+std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
  // define a vector containing real output number
-  std::vector<int> real_outputs;
+  std::vector<size_t> real_outputs;
  // P.BatchNorm is used for training and inference
  // can add the filter list for more operators here....
  if (node_name == "BatchNorm") {
@ -46,8 +46,7 @@ std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &out
  } else {
    // by default, TensorLoader will load all outputs
    for (size_t j = 0; j < output_size; ++j) {
-      size_t index = j;
-      real_outputs.push_back(index);
+      real_outputs.push_back(j);
    }
  }
  return real_outputs;
@ -86,11 +85,11 @@ void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, ui
  auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
  auto node_name = AnfAlgo::GetCNodeName(cnode);
  std::string kernel_name = GetKernelNodeName(cnode);
-  std::vector<int> real_outputs = CheckRealOutput(node_name, output_size);
+  std::vector<size_t> real_outputs = CheckRealOutput(node_name, output_size);

-  for (int j : real_outputs) {
+  for (size_t j : real_outputs) {
    auto addr = kernel_outputs[j];
-    auto type = AnfAlgo::GetOutputInferDataType(cnode, (size_t)j);
+    auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
    // For example, this happens with the Depend op
    if (type == kMetaTypeNone) {
      continue;
--- a/mindspore/ccsrc/debug/debugger/debugger_utils.h
+++ b/mindspore/ccsrc/debug/debugger/debugger_utils.h
@ -24,7 +24,7 @@ using mindspore::kernel::KernelLaunchInfo;

 namespace mindspore {

-std::vector<int> CheckRealOutput(const std::string &node_name, const size_t &output_size);
+std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size);

 void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_);

--- a/mindspore/ccsrc/debug/debugger/grpc_client.cc
+++ b/mindspore/ccsrc/debug/debugger/grpc_client.cc
@ -48,7 +48,6 @@ EventReply GrpcClient::WaitForCommand(const Metadata &metadata) {
  EventReply reply;
  grpc::ClientContext context;
  grpc::Status status = stub_->WaitCMD(&context, metadata, &reply);
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: WaitForCommand";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
@ -61,7 +60,6 @@ EventReply GrpcClient::SendMetadata(const Metadata &metadata) {
  EventReply reply;
  grpc::ClientContext context;
  grpc::Status status = stub_->SendMetadata(&context, metadata, &reply);
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: SendMetadata";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
@ -114,7 +112,6 @@ EventReply GrpcClient::SendGraph(const GraphProto &graph) {
  }
  writer->WritesDone();
  grpc::Status status = writer->Finish();
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: SendGraph";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
@ -136,7 +133,6 @@ EventReply GrpcClient::SendMultiGraphs(const std::list<Chunk> &chunks) {
  }
  writer->WritesDone();
  grpc::Status status = writer->Finish();
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: SendMultigraphs";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
@ -158,7 +154,6 @@ EventReply GrpcClient::SendTensors(const std::list<TensorProto> &tensors) {
  }
  writer->WritesDone();
  grpc::Status status = writer->Finish();
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: SendTensors";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
@ -180,7 +175,6 @@ EventReply GrpcClient::SendWatchpointHits(const std::list<WatchpointHit> &watchp
  }
  writer->WritesDone();
  grpc::Status status = writer->Finish();
-
  if (!status.ok()) {
    MS_LOG(ERROR) << "RPC failed: SendWatchpointHits";
    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
--- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
@ -18,25 +18,24 @@
 #include <algorithm>
 #include <chrono>

-DbgServices::DbgServices(bool verbose) { debug_services_ = new DebugServices(); }
+DbgServices::DbgServices(bool verbose) { debug_services_ = std::make_shared<DebugServices>(); }

 DbgServices::DbgServices(const DbgServices &other) {
  MS_LOG(INFO) << "cpp DbgServices object is created via copy";
-  debug_services_ = new DebugServices(*other.debug_services_);
+  debug_services_ = other.debug_services_;
 }

 DbgServices &DbgServices::operator=(const DbgServices &other) {
  MS_LOG(INFO) << "cpp DbgServices object is being assigned a different state";
  if (this != &other) {
-    delete debug_services_;
-    debug_services_ = new DebugServices(*other.debug_services_);
+    debug_services_ = other.debug_services_;
  }
  return *this;
 }

 DbgServices::~DbgServices() noexcept {
  MS_LOG(INFO) << "cpp DbgServices object is deleted";
-  delete debug_services_;
+  debug_services_ = nullptr;
 }

 std::string DbgServices::GetVersion() const {
@ -70,25 +69,26 @@ int32_t DbgServices::AddWatchpoint(
  unsigned int id, unsigned int watch_condition,
  std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
  std::vector<parameter_t> parameter_list) {
-  MS_LOG(INFO) << "cpp start";
+  MS_EXCEPTION_IF_NULL(debug_services_);
+  MS_LOG(INFO) << "cpp DbgServices start AddWatchpoint";

  MS_LOG(INFO) << "cpp DbgServices AddWatchpoint id " << id;
  MS_LOG(INFO) << "cpp DbgServices AddWatchpoint watch_condition " << watch_condition;
  for (auto const &node : check_nodes) {
-    MS_LOG(INFO) << "cpp DbgServices AddWatchpoint name " << node.first;
+    MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint name " << node.first;
    auto attr_map = node.second;

    bool is_output = std::get<bool>(attr_map["is_output"]);
-    MS_LOG(INFO) << "cpp DbgServices AddWatchpoint is_output " << is_output;
+    MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint is_output " << is_output;

    std::vector<std::string> rank_id_str = std::get<std::vector<std::string>>(attr_map["rank_id"]);
    std::vector<std::uint32_t> rank_id;
    (void)std::transform(
      rank_id_str.begin(), rank_id_str.end(), std::back_inserter(rank_id),
      [](std::string &id_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(id_str)); });
-    MS_LOG(INFO) << "cpp DbgServices AddWatchpoint rank_id ";
+    MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint rank_id: ";
    for (auto const &i : rank_id) {
-      MS_LOG(INFO) << i << " ";
+      MS_LOG(DEBUG) << i << " ";
    }

    // std::vector<uint32_t> root_graph_id = std::get<std::vector<uint32_t>>(attr_map["root_graph_id"]);
@ -97,9 +97,9 @@ int32_t DbgServices::AddWatchpoint(
    (void)std::transform(
      root_graph_id_str.begin(), root_graph_id_str.end(), std::back_inserter(root_graph_id),
      [](std::string &graph_str) -> std::uint32_t { return static_cast<uint32_t>(std::stoul(graph_str)); });
-    MS_LOG(INFO) << "cpp DbgServices AddWatchpoint root_graph_id";
+    MS_LOG(DEBUG) << "cpp DbgServices AddWatchpoint root_graph_id: ";
    for (auto const &j : root_graph_id) {
-      MS_LOG(INFO) << j << " ";
+      MS_LOG(DEBUG) << j << " ";
    }
  }

@ -154,17 +154,19 @@ int32_t DbgServices::AddWatchpoint(

  debug_services_->AddWatchpoint(id, watch_condition, 0, check_node_list, parameter_list_backend,
                                 &check_node_device_list, &check_node_graph_list);
-  MS_LOG(INFO) << "cpp end";
+  MS_LOG(INFO) << "cpp DbgServices end AddWatchpoint";
  return 0;
 }

 int32_t DbgServices::RemoveWatchpoint(unsigned int id) {
+  MS_EXCEPTION_IF_NULL(debug_services_);
  MS_LOG(INFO) << "cpp DbgServices RemoveWatchpoint id " << id;
  debug_services_->RemoveWatchpoint(id);
  return 0;
 }

 std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iteration) {
+  MS_EXCEPTION_IF_NULL(debug_services_);
  MS_LOG(INFO) << "cpp DbgServices CheckWatchpoint iteration " << iteration;

  std::vector<std::string> name;
@ -197,19 +199,19 @@ std::vector<watchpoint_hit_t> DbgServices::CheckWatchpoints(unsigned int iterati
    watchpoint_hit_t hit(name[i], std::stoi(slot[i]), condition[i], watchpoint_id[i], api_parameter_vector,
                         error_codes[i], rank_id[i], root_graph_id[i]);

-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t name " << hit.name;
-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t slot " << hit.slot;
-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t watchpoint_id " << hit.watchpoint_id;
-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t error_code " << hit.error_code;
-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t rank_id " << hit.rank_id;
-    MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t root_graph_id " << hit.root_graph_id;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t name " << hit.name;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t slot " << hit.slot;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t watchpoint_id " << hit.watchpoint_id;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t error_code " << hit.error_code;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t rank_id " << hit.rank_id;
+    MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t root_graph_id " << hit.root_graph_id;

    for (auto const &parameter_i : api_parameter_vector) {
-      MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter name " << parameter_i.name;
-      MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter disabled " << parameter_i.disabled;
-      MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter value " << parameter_i.value;
-      MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter hit " << parameter_i.hit;
-      MS_LOG(INFO) << "cpp DbgServices watchpoint_hit_t parameter actual_value " << parameter_i.actual_value;
+      MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t parameter name " << parameter_i.name;
+      MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t parameter disabled " << parameter_i.disabled;
+      MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t parameter value " << parameter_i.value;
+      MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t parameter hit " << parameter_i.hit;
+      MS_LOG(DEBUG) << "cpp DbgServices watchpoint_hit_t parameter actual_value " << parameter_i.actual_value;
    }

    hits.push_back(hit);
@ -230,6 +232,7 @@ unsigned int GetTensorSlot(const tensor_info_t info) { return info.slot; }
 bool GetTensorIsOutput(const tensor_info_t info) { return info.is_output; }

 std::vector<std::shared_ptr<TensorData>> DbgServices::ReadTensorsUtil(std::vector<tensor_info_t> info) {
+  MS_EXCEPTION_IF_NULL(debug_services_);
  for (auto i : info) {
    MS_LOG(INFO) << "cpp DbgServices ReadTensor info name " << i.node_name << ", slot " << i.slot << ", iteration "
                 << i.iteration << ", rank_id " << i.rank_id << ", root_graph_id " << i.root_graph_id << ", is_output "
@ -284,6 +287,7 @@ std::vector<tensor_data_t> DbgServices::ReadTensors(const std::vector<tensor_inf
  std::vector<std::shared_ptr<TensorData>> result_list;
  result_list = ReadTensorsUtil(info);
  for (auto result : result_list) {
+    MS_EXCEPTION_IF_NULL(result);
    tensor_data_t tensor_data_item(result->GetDataPtr(), result->GetByteSize(), result->GetType(), result->GetShape());
    tensors_read.push_back(tensor_data_item);
  }
--- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.h
@ -103,7 +103,7 @@ struct tensor_info_t {
 struct tensor_data_t {
  tensor_data_t(char *data_ptr, uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
      : data_size(data_size), dtype(dtype), shape(shape) {
-    if (data_ptr != NULL) {
+    if (data_ptr != nullptr) {
      this->data_ptr = py::bytes(data_ptr, data_size);
    } else {
      this->data_ptr = py::bytes();
@ -182,9 +182,6 @@ struct TensorStatData {
 };

 class DbgServices {
- private:
-  DebugServices *debug_services_;
-
 public:
  explicit DbgServices(bool verbose = false);

@ -215,6 +212,9 @@ class DbgServices {
  std::vector<TensorStatData> ReadTensorsStat(const std::vector<tensor_info_t> info);

  std::string GetVersion() const;
+
+ private:
+  std::shared_ptr<DebugServices> debug_services_ = nullptr;
 };

 #endif  // DEBUG_DBG_SERVICES_H_
--- a/mindspore/ccsrc/debug/debugger/tensor_summary.cc
+++ b/mindspore/ccsrc/debug/debugger/tensor_summary.cc
@ -328,10 +328,10 @@ void TensorSummary<T>::InitCalculators(const std::vector<DebugServices::watchpoi
        range_counts_[wp_id]->set_range_end_inclusive(wp.parameter_list[1].value);
      }
    } else if (wp.tensor_update_ratio_mean_enabled() && prev_tensor_ptr_) {
-      (void)means_.insert({"curr_prev_diff_mean", std::make_unique<MeanCalculator>()});
-      (void)means_.insert({"abs_prev_mean", std::make_unique<MeanCalculator>()});
+      (void)means_.emplace("curr_prev_diff_mean", std::make_unique<MeanCalculator>());
+      (void)means_.emplace("abs_prev_mean", std::make_unique<MeanCalculator>());
    } else if (wp.abs_mean_enabled()) {
-      (void)means_.insert({"abs_current_mean", std::make_unique<MeanCalculator>()});
+      (void)means_.emplace("abs_current_mean", std::make_unique<MeanCalculator>());
    }
  }
 }
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@ -150,6 +150,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
    return ret;
  }
  auto tensor_data = std::make_shared<mindspore::TensorData>();
+  MS_EXCEPTION_IF_NULL(tensor_data);
  tensor_data->SetName(tensor_name);
  tensor_data->SetExecutionOrder(execution_order);
  tensor_data->SetSlot(slot);
--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@ -57,7 +57,7 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_in
  } else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
 #ifdef ENABLE_DEBUGGER
    auto debugger = Debugger::GetInstance();
-    if (debugger) {
+    if (debugger != nullptr) {
      std::string kernel_name = cnode->fullname_with_scope();
      debugger->SetCurNode(kernel_name);
      bool read_data = CheckReadData(cnode);
@ -111,7 +111,7 @@ void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const

 #ifdef ENABLE_DEBUGGER
  auto debugger = Debugger::GetInstance();
-  if (debugger) {
+  if (debugger != nullptr) {
    debugger->Debugger::UpdateStepNumGPU();
    // Reset exec_order for the next step
    exec_order_ = 0;
--- a/mindspore/offline_debug/convert_async.py
+++ b/mindspore/offline_debug/convert_async.py
@ -28,7 +28,9 @@ import numpy as np


 class ConvertToolLoader:
-    """Module to load CANN conversion tool."""
+    """
+    Module to load CANN conversion tool.
+    """

    def __init__(self):
        self.utils = None
@ -44,7 +46,9 @@ class ConvertToolLoader:

    @staticmethod
    def find_toolkit_path():
-        """Find the path to Ascend toolkit."""
+        """
+        Find the path to Ascend toolkit.
+        """
        ascend_toolkit_path = os.getenv("ASCEND_TOOLKIT_PATH")
        if not ascend_toolkit_path:
            ascend_toolkit_path = "/usr/local/Ascend"
@ -63,7 +67,9 @@ class ConvertToolLoader:
        return msaccucmp_file_list[0].parent

    def load_convert_tool(self):
-        """load CANN conversion tool from the toolkit path."""
+        """
+        Load CANN conversion tool from the toolkit path.
+        """
        # add toolkit path to system searching module path
        if str(self.toolkit_path) not in sys.path:
            sys.path.insert(0, str(self.toolkit_path))
@ -99,13 +105,17 @@ class ConvertToolLoader:
            self.compare_exception = self.utils.CompareError

    def reset_system_path(self):
-        # restore system searching module path
+        """
+        Restore system searching module path
+        """
        if str(self.toolkit_path) in sys.path:
            sys.path.remove(str(self.toolkit_path))


 def parse_args(file_list, output_path):
-    """Helper function to parse the input argument for the conversion configuration."""
+    """
+    Helper function to parse the input argument for the conversion configuration.
+    """
    args_dict = dict()
    args_dict['dump_version'] = '2.0'
    args_dict['format'] = 'NCHW'
@ -122,7 +132,9 @@ def parse_args(file_list, output_path):


 class AsyncDumpConverter:
-    """Convert the target async dump data into npy files."""
+    """
+    Convert the target async dump data into npy files.
+    """

    def __init__(self, file_list, output_path):
        # check input path
@ -138,12 +150,16 @@ class AsyncDumpConverter:
        self.clear_failed_list_file()

    def clear_failed_list_file(self):
-        """Remove existing failed txt file."""
+        """
+        Remove existing failed txt file.
+        """
        if self.failed_file_path and os.path.exists(self.failed_file_path):
            os.remove(self.failed_file_path)

    def convert_files(self):
-        """Main entry of the converter to convert async dump files into npy format."""
+        """
+        Main entry of the converter to convert async dump files into npy format.
+        """
        self.convert_tool.log.print_info_log('Start to convert async dump files.')
        try:
            if self.args.format is not None:
@ -164,7 +180,9 @@ class AsyncDumpConverter:
        self.convert_tool.log.print_info_log('Finish to convert async dump files.')

    def convert_failed_tensors(self):
-        """Convert the failed tensor recorded in the failed txt file."""
+        """
+        Convert the failed tensor recorded in the failed txt file.
+        """
        self.convert_tool.log.print_info_log(
            'Start to convert failed tensors recorded in ' + self.failed_file_path + '.')
        with open(self.failed_file_path) as failed_lines:
@ -177,7 +195,9 @@ class AsyncDumpConverter:
                        'Failed to convert ' + failed_line + ' to Host format: ' + str(err))

    def convert_one_failed_tensor(self, failed_tensor):
-        """Convert failed operator one by one."""
+        """
+        Convert failed operator one by one.
+        """
        if len(failed_tensor) <= 1:
            raise ValueError(
                "Invalid tensor info in convert_failed_file_list.txt")
@ -191,11 +211,13 @@ class AsyncDumpConverter:
            tensor = getattr(op_data, tensor_type)[index]
            dump_data_array = self.convert_tool.utils.deserialize_dump_data_to_array(tensor)
            array = dump_data_array.reshape(tensor.shape.dim)
-            self._save_tensor_to_npy_file(
-                file_path, tensor_type, index, tensor.format, array)
+            out_path = self._generate_path(file_path, tensor_type, index, tensor.format)
+            self._save_tensor_to_npy_file(out_path, array)

    def handle_multi_process(self, convert_obj, files):
-        """Convert async format files to npy in a multithreaded manner."""
+        """
+        Convert async format files to npy in a multithreaded manner.
+        """
        return_code = self.convert_tool.compare_none_error
        # try looking for function in compatibility with the toolkit package version.
        progress = self.convert_tool.progress(len(files))
@ -223,7 +245,9 @@ class AsyncDumpConverter:
        return return_code

    def _get_file_list(self, files, convert_obj):
-        """Process to get file lists in multi_process."""
+        """
+        Process to get file lists in multi_process.
+        """
        multi_process_file_list = []
        big_file_list = []
        max_file_size = 0
@ -241,7 +265,9 @@ class AsyncDumpConverter:
        return multi_process_file_list, big_file_list

    def _process_big_file(self, big_file_list, convert_obj):
-        """Process big file in multi_process."""
+        """
+        Process big file in multi_process.
+        """
        return_code = self.convert_tool.compare_none_error
        for big_file in big_file_list:
            if hasattr(convert_obj, '_convert_format_for_one_file'):
@ -256,8 +282,18 @@ class AsyncDumpConverter:
                return_code = ret_bf
        return return_code

-    def _save_tensor_to_npy_file(self, file_path, tensor_type, idx, tensor_format, dump_data_array):
-        """Save tensor file into npy format."""
+    @staticmethod
+    def _save_tensor_to_npy_file(out_path, dump_data_array):
+        """
+        Save tensor file into npy format.
+        """
+        np.save(out_path, dump_data_array)
+        os.chmod(out_path, stat.S_IRUSR)
+
+    def _generate_path(self, file_path, tensor_type, idx, tensor_format):
+        """
+        Generate path and filename to the target npy files
+        """
        file_name = os.path.basename(file_path)
        name_splits = file_name.split('.')
        name_splits[1] = name_splits[1].split('_')[-1]
@ -268,12 +304,12 @@ class AsyncDumpConverter:
            idx,
            self.convert_tool.common.get_format_string(tensor_format)
        )
-        out_path = os.path.join(self.output_path, out_file_name)
-        np.save(out_path, dump_data_array)
-        os.chmod(out_path, stat.S_IRUSR)
+        return os.path.join(self.output_path, out_file_name)

    def _rename_generated_npy_files(self):
-        """In order to follow dump naming convention, rename npy files generated by CANN conversion tool."""
+        """
+        In order to follow dump naming convention, rename npy files generated by CANN conversion tool.
+        """
        target_file_list = []
        for in_file in self.files_to_convert:
            target_file_list.extend(glob.glob(in_file + "*.npy"))
--- a/mindspore/offline_debug/dbg_services.py
+++ b/mindspore/offline_debug/dbg_services.py
@ -31,7 +31,7 @@ def get_version():
    Function to return offline Debug Services version.

    Returns:
-        version (str): dbgServices version.
+        version (str): DbgServices version.

    Examples:
        >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
@ -48,7 +48,7 @@ class DbgLogger:
    Offline Debug Services Logger

    Args:
-        verbose (bool): whether to print logs.
+        verbose (bool): Whether to print logs.

    Examples:
        >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
@ -70,8 +70,8 @@ class DbgServices:
    Offline Debug Services class.

    Args:
-        dump_file_path (str): directory where the dump files are saved.
-        verbose (bool): whether to print logs (default: False)..
+        dump_file_path (str): Directory where the dump files are saved.
+        verbose (bool): Whether to print logs. Default: False.

    Examples:
        >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services
@ -200,7 +200,7 @@ class DbgServices:
        Removing watchpoint from Debug Service instance.

        Args:
-            watchpoint_id (int): Watchpoint id
+            watchpoint_id (int): Watchpoint id.

        Returns:
            Debug Service instance with removed watchpoint.
@ -733,17 +733,17 @@ class TensorStatData:
        data_size (int): Size of data in bytes.
        dtype (int): An encoding representing the type of TensorData.
        shape (list): Shape of tensor.
-        is_bool (bool): Whether the data type is bool
-        max_value (float): Maximum value in tensor's elements
-        min_value (float): Minimum value in tensor's elements
-        avg_value (float): Average value of all tensor's elements
-        count (int): Number of elements in tensor
-        neg_zero_count (int): Number of negative elements in tensor
-        pos_zero_count (int): Number of positive elements in tensor
-        nan_cout (int): Number of nan elements in tensor
-        neg_inf_count (int): Number of negative infinity elements in tensor
-        pos_inf_count (int): Number of positive infinity elements in tensor
-        zero_count (int): Total number of zero elements in tensor
+        is_bool (bool): Whether the data type is bool.
+        max_value (float): Maximum value in tensor's elements.
+        min_value (float): Minimum value in tensor's elements.
+        avg_value (float): Average value of all tensor's elements.
+        count (int): Number of elements in tensor.
+        neg_zero_count (int): Number of negative elements in tensor.
+        pos_zero_count (int): Number of positive elements in tensor.
+        nan_cout (int): Number of nan elements in tensor.
+        neg_inf_count (int): Number of negative infinity elements in tensor.
+        pos_inf_count (int): Number of positive infinity elements in tensor.
+        zero_count (int): Total number of zero elements in tensor.

    Examples:
        >>> from mindspore.ccsrc.debug.debugger.offline_debug import dbg_services