!17987 fix repeated release device resource of actor runtime

Merge pull request !17987 from limingqi107/actor_runtime
2021-06-08 21:10:57 +08:00 · 2021-06-08 21:10:57 +08:00 · 4932854776
parent 8f45d390eb e9b0eab177
commit 4932854776
3 changed files with 28 additions and 16 deletions
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@ -301,30 +301,38 @@ std::vector<KernelWithIndex> AnfRuntimeAlgorithm::GetAllOutputWithIndex(const An
  std::vector<KernelWithIndex> ret;
  std::vector<KernelWithIndex> ret_empty;

-  // The MakeTuple node need expand and recurse.
+  // The makeTuple node need expand and recurse.
  if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimMakeTuple)) {
    auto make_tuple = node->cast<CNodePtr>();
    MS_EXCEPTION_IF_NULL(make_tuple);
    for (size_t i = 1; i < make_tuple->inputs().size(); i++) {
-      auto input_i_vector = GetAllOutputWithIndex(make_tuple->input(i));
-      (void)std::copy(input_i_vector.begin(), input_i_vector.end(), std::back_inserter(ret));
+      auto make_tuple_output = GetAllOutputWithIndex(make_tuple->input(i));
+      (void)std::copy(make_tuple_output.begin(), make_tuple_output.end(), std::back_inserter(ret));
    }
    return ret;
  }

-  auto outputs_num = AnfAlgo::GetOutputTensorNum(node);
-  if (!IsRealCNodeKernel(node)) {
-    outputs_num = 1;
+  // The depend node need get the real node.
+  if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimDepend)) {
+    auto depend_node = node->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(depend_node);
+    auto real_output = GetAllOutputWithIndex(depend_node->input(kRealInputIndexInDepend));
+    (void)std::copy(real_output.begin(), real_output.end(), std::back_inserter(ret));
+    return ret;
  }
+
+  const std::vector<PrimitivePtr> return_types = {prim::kPrimDepend, prim::kPrimMakeTuple};
  // The output may be the tuple, so need visit all the outputs of node.
+  auto outputs_num = AnfAlgo::GetOutputTensorNum(node);
  for (size_t i = 0; i < outputs_num; ++i) {
-    const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false);
+    const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false, return_types);
    MS_EXCEPTION_IF_NULL(output_with_index.first);

-    // The MakeTuple node need recurse.
-    if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) {
-      auto input_vector = GetAllOutputWithIndex(output_with_index.first);
-      (void)std::copy(input_vector.begin(), input_vector.end(), std::back_inserter(ret));
+    // The depend and makeTuple node need recurse.
+    if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimDepend) ||
+        AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) {
+      auto output_vector = GetAllOutputWithIndex(output_with_index.first);
+      (void)std::copy(output_vector.begin(), output_vector.end(), std::back_inserter(ret));
      continue;
    }

--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@ -18,7 +18,6 @@
 #include <vector>
 #include <memory>
 #include "runtime/device/gpu/gpu_device_manager.h"
-#include "runtime/device/kernel_runtime_manager.h"
 #include "utils/log_adapter.h"
 #include "utils/ms_context.h"
 #include "runtime/device/gpu/gpu_memory_allocator.h"
@ -86,15 +85,15 @@ bool GPUDeviceAddress::SyncHostToDevice(const ShapeVector &, size_t size, TypeId
    return SyncHostToDevice(size, host_ptr);
  }

+  // PyNative mode need copy async to improve performance.
  MS_EXCEPTION_IF_NULL(host_ptr);
  bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
  if (!need_sync) {
    return true;
  }
-  auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
-  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kGPUDevice, device_id);
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  return runtime_instance->MemcpyAsync(ptr_, host_ptr, size, 0);
+  auto &stream = GPUDeviceManager::GetInstance().default_stream();
+  MS_EXCEPTION_IF_NULL(stream);
+  return GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(ptr_, host_ptr, size, stream);
 }

 void GPUDeviceAddress::ClearDeviceMemory() {
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc
@ -43,6 +43,11 @@ void GPUDeviceManager::InitDevice() {
 }

 void GPUDeviceManager::ReleaseDevice() {
+  // Avoid repeated release device resource.
+  if (!dev_alive_) {
+    return;
+  }
+
  for (CudaDeviceStream stream : gpu_streams_) {
    if (stream != nullptr) {
      CHECK_OP_RET_WITH_ERROR(CudaDriver::DestroyStream(stream), "Failed to destroy CUDA stream.");