!17987 fix repeated release device resource of actor runtime

Merge pull request !17987 from limingqi107/actor_runtime
This commit is contained in:
i-robot 2021-06-08 21:10:57 +08:00 committed by Gitee
commit 4932854776
3 changed files with 28 additions and 16 deletions

View File

@ -301,30 +301,38 @@ std::vector<KernelWithIndex> AnfRuntimeAlgorithm::GetAllOutputWithIndex(const An
std::vector<KernelWithIndex> ret;
std::vector<KernelWithIndex> ret_empty;
// The MakeTuple node need expand and recurse.
// The makeTuple node need expand and recurse.
if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimMakeTuple)) {
auto make_tuple = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(make_tuple);
for (size_t i = 1; i < make_tuple->inputs().size(); i++) {
auto input_i_vector = GetAllOutputWithIndex(make_tuple->input(i));
(void)std::copy(input_i_vector.begin(), input_i_vector.end(), std::back_inserter(ret));
auto make_tuple_output = GetAllOutputWithIndex(make_tuple->input(i));
(void)std::copy(make_tuple_output.begin(), make_tuple_output.end(), std::back_inserter(ret));
}
return ret;
}
auto outputs_num = AnfAlgo::GetOutputTensorNum(node);
if (!IsRealCNodeKernel(node)) {
outputs_num = 1;
// The depend node need get the real node.
if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimDepend)) {
auto depend_node = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(depend_node);
auto real_output = GetAllOutputWithIndex(depend_node->input(kRealInputIndexInDepend));
(void)std::copy(real_output.begin(), real_output.end(), std::back_inserter(ret));
return ret;
}
const std::vector<PrimitivePtr> return_types = {prim::kPrimDepend, prim::kPrimMakeTuple};
// The output may be the tuple, so need visit all the outputs of node.
auto outputs_num = AnfAlgo::GetOutputTensorNum(node);
for (size_t i = 0; i < outputs_num; ++i) {
const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false);
const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false, return_types);
MS_EXCEPTION_IF_NULL(output_with_index.first);
// The MakeTuple node need recurse.
if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) {
auto input_vector = GetAllOutputWithIndex(output_with_index.first);
(void)std::copy(input_vector.begin(), input_vector.end(), std::back_inserter(ret));
// The depend and makeTuple node need recurse.
if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimDepend) ||
AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) {
auto output_vector = GetAllOutputWithIndex(output_with_index.first);
(void)std::copy(output_vector.begin(), output_vector.end(), std::back_inserter(ret));
continue;
}

View File

@ -18,7 +18,6 @@
#include <vector>
#include <memory>
#include "runtime/device/gpu/gpu_device_manager.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/log_adapter.h"
#include "utils/ms_context.h"
#include "runtime/device/gpu/gpu_memory_allocator.h"
@ -86,15 +85,15 @@ bool GPUDeviceAddress::SyncHostToDevice(const ShapeVector &, size_t size, TypeId
return SyncHostToDevice(size, host_ptr);
}
// PyNative mode need copy async to improve performance.
MS_EXCEPTION_IF_NULL(host_ptr);
bool need_sync = (size != 0) && (size_ != 0) && (size <= size_);
if (!need_sync) {
return true;
}
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kGPUDevice, device_id);
MS_EXCEPTION_IF_NULL(runtime_instance);
return runtime_instance->MemcpyAsync(ptr_, host_ptr, size, 0);
auto &stream = GPUDeviceManager::GetInstance().default_stream();
MS_EXCEPTION_IF_NULL(stream);
return GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(ptr_, host_ptr, size, stream);
}
void GPUDeviceAddress::ClearDeviceMemory() {

View File

@ -43,6 +43,11 @@ void GPUDeviceManager::InitDevice() {
}
void GPUDeviceManager::ReleaseDevice() {
// Avoid repeated release device resource.
if (!dev_alive_) {
return;
}
for (CudaDeviceStream stream : gpu_streams_) {
if (stream != nullptr) {
CHECK_OP_RET_WITH_ERROR(CudaDriver::DestroyStream(stream), "Failed to destroy CUDA stream.");