diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index ac7eb84130f..cdbc0a49f6a 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -301,30 +301,38 @@ std::vector AnfRuntimeAlgorithm::GetAllOutputWithIndex(const An std::vector ret; std::vector ret_empty; - // The MakeTuple node need expand and recurse. + // The makeTuple node need expand and recurse. if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimMakeTuple)) { auto make_tuple = node->cast(); MS_EXCEPTION_IF_NULL(make_tuple); for (size_t i = 1; i < make_tuple->inputs().size(); i++) { - auto input_i_vector = GetAllOutputWithIndex(make_tuple->input(i)); - (void)std::copy(input_i_vector.begin(), input_i_vector.end(), std::back_inserter(ret)); + auto make_tuple_output = GetAllOutputWithIndex(make_tuple->input(i)); + (void)std::copy(make_tuple_output.begin(), make_tuple_output.end(), std::back_inserter(ret)); } return ret; } - auto outputs_num = AnfAlgo::GetOutputTensorNum(node); - if (!IsRealCNodeKernel(node)) { - outputs_num = 1; + // The depend node need get the real node. + if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimDepend)) { + auto depend_node = node->cast(); + MS_EXCEPTION_IF_NULL(depend_node); + auto real_output = GetAllOutputWithIndex(depend_node->input(kRealInputIndexInDepend)); + (void)std::copy(real_output.begin(), real_output.end(), std::back_inserter(ret)); + return ret; } + + const std::vector return_types = {prim::kPrimDepend, prim::kPrimMakeTuple}; // The output may be the tuple, so need visit all the outputs of node. + auto outputs_num = AnfAlgo::GetOutputTensorNum(node); for (size_t i = 0; i < outputs_num; ++i) { - const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false); + const auto &output_with_index = AnfAlgo::VisitKernelWithReturnType(node, i, false, return_types); MS_EXCEPTION_IF_NULL(output_with_index.first); - // The MakeTuple node need recurse. - if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) { - auto input_vector = GetAllOutputWithIndex(output_with_index.first); - (void)std::copy(input_vector.begin(), input_vector.end(), std::back_inserter(ret)); + // The depend and makeTuple node need recurse. + if (AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimDepend) || + AnfAlgo::CheckPrimitiveType(output_with_index.first, prim::kPrimMakeTuple)) { + auto output_vector = GetAllOutputWithIndex(output_with_index.first); + (void)std::copy(output_vector.begin(), output_vector.end(), std::back_inserter(ret)); continue; } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc index 86fcf62e550..5efdf9ca65a 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc @@ -18,7 +18,6 @@ #include #include #include "runtime/device/gpu/gpu_device_manager.h" -#include "runtime/device/kernel_runtime_manager.h" #include "utils/log_adapter.h" #include "utils/ms_context.h" #include "runtime/device/gpu/gpu_memory_allocator.h" @@ -86,15 +85,15 @@ bool GPUDeviceAddress::SyncHostToDevice(const ShapeVector &, size_t size, TypeId return SyncHostToDevice(size, host_ptr); } + // PyNative mode need copy async to improve performance. MS_EXCEPTION_IF_NULL(host_ptr); bool need_sync = (size != 0) && (size_ != 0) && (size <= size_); if (!need_sync) { return true; } - auto device_id = ms_context->get_param(MS_CTX_DEVICE_ID); - auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kGPUDevice, device_id); - MS_EXCEPTION_IF_NULL(runtime_instance); - return runtime_instance->MemcpyAsync(ptr_, host_ptr, size, 0); + auto &stream = GPUDeviceManager::GetInstance().default_stream(); + MS_EXCEPTION_IF_NULL(stream); + return GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(ptr_, host_ptr, size, stream); } void GPUDeviceAddress::ClearDeviceMemory() { diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc index f44740346ac..233dffe5b72 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_manager.cc @@ -43,6 +43,11 @@ void GPUDeviceManager::InitDevice() { } void GPUDeviceManager::ReleaseDevice() { + // Avoid repeated release device resource. + if (!dev_alive_) { + return; + } + for (CudaDeviceStream stream : gpu_streams_) { if (stream != nullptr) { CHECK_OP_RET_WITH_ERROR(CudaDriver::DestroyStream(stream), "Failed to destroy CUDA stream.");