From 059b05a72e275c402f179ffc9f542dd0ccf04818 Mon Sep 17 00:00:00 2001 From: chujinjin Date: Mon, 17 May 2021 15:23:41 +0800 Subject: [PATCH] fix memcopy async error --- .../runtime/device/ascend/ascend_device_address.cc | 2 +- .../runtime/device/ascend/ascend_kernel_runtime.cc | 11 +++++------ .../ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc | 8 +------- mindspore/ccsrc/runtime/device/kernel_runtime.h | 3 --- 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index f4368970018..909ecb5d909 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -113,7 +113,7 @@ void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind) MS_EXCEPTION(DeviceProcessError) << "rtMemcpy failed"; } } else { - auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast(kind)); + auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast(RT_MEMCPY_HOST_TO_DEVICE_EX)); if (!ret) { MS_EXCEPTION(DeviceProcessError) << "MemcpyAsync failed"; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 690638762f8..5b26fc06f53 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -703,7 +703,6 @@ bool AscendKernelRuntime::SyncStream() { MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error."; return false; } - FreeAndClearBufferPtrs(); return true; } @@ -714,11 +713,11 @@ bool AscendKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size, return false; } - std::shared_ptr buffer(new char[size]()); - MS_EXCEPTION_IF_NULL(buffer); - std::copy(reinterpret_cast(src), reinterpret_cast(src) + size, buffer.get()); - AddBufferPtr(buffer); - if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, buffer.get(), size, static_cast(kind), stream_)) { + auto copy_kind = static_cast(kind); + if (copy_kind != RT_MEMCPY_HOST_TO_DEVICE_EX) { + MS_LOG(EXCEPTION) << "Memory copy async not support cache host buffer in kind: " << kind; + } + if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, src, size, static_cast(kind), stream_)) { MS_LOG(ERROR) << "Call runtime rtMemcpyAsync error."; return false; } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index ab432c080e3..58c027a804c 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -65,7 +65,6 @@ bool GPUKernelRuntime::SyncStream() { MS_LOG(ERROR) << "Call SyncStream error."; return false; } - FreeAndClearBufferPtrs(); return true; } @@ -198,14 +197,9 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, } // namespace bool GPUKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) { - std::shared_ptr buffer(new char[size]()); - MS_EXCEPTION_IF_NULL(buffer); - std::copy(reinterpret_cast(src), reinterpret_cast(src) + size, buffer.get()); - AddBufferPtr(buffer); - auto &stream = GPUDeviceManager::GetInstance().default_stream(); MS_EXCEPTION_IF_NULL(stream); - auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, buffer.get(), size, stream); + auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, src, size, stream); if (!ret) { MS_LOG(ERROR) << "CopyHostMemToDeviceAsync failed"; return false; diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index 562bc407b3d..de7cfb4be9c 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -105,8 +105,6 @@ class KernelRuntime { virtual void PreInit() {} virtual uint64_t GetAvailableMemMaxSize() const { return 0; } - void AddBufferPtr(std::shared_ptr ptr) { buffer_ptrs_.push_back(ptr); } - void FreeAndClearBufferPtrs() { buffer_ptrs_.clear(); } void GenKernelEvents(const session::KernelGraph *graph); virtual std::shared_ptr CreateDeviceEvent() { return nullptr; } virtual DeviceAddressType GetTargetDeviceAddressType() const = 0; @@ -162,7 +160,6 @@ class KernelRuntime { std::map>>, std::vector>>>> graph_kernel_events_map_; - std::vector> buffer_ptrs_ = {}; }; using KernelRuntimePtr = std::shared_ptr; } // namespace device