forked from mindspore-Ecosystem/mindspore
!16492 fix memcopy async error
From: @chujinjin Reviewed-by: @zhoufeng54,@kisnwang Signed-off-by: @kisnwang
This commit is contained in:
commit
dcec68bcc7
|
@ -113,7 +113,7 @@ void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind)
|
|||
MS_EXCEPTION(DeviceProcessError) << "rtMemcpy failed";
|
||||
}
|
||||
} else {
|
||||
auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(kind));
|
||||
auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(RT_MEMCPY_HOST_TO_DEVICE_EX));
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(DeviceProcessError) << "MemcpyAsync failed";
|
||||
}
|
||||
|
|
|
@ -703,7 +703,6 @@ bool AscendKernelRuntime::SyncStream() {
|
|||
MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
|
||||
return false;
|
||||
}
|
||||
FreeAndClearBufferPtrs();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -714,11 +713,11 @@ bool AscendKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size,
|
|||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<char[]> buffer(new char[size]());
|
||||
MS_EXCEPTION_IF_NULL(buffer);
|
||||
std::copy(reinterpret_cast<const char *>(src), reinterpret_cast<const char *>(src) + size, buffer.get());
|
||||
AddBufferPtr(buffer);
|
||||
if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, buffer.get(), size, static_cast<rtMemcpyKind_t>(kind), stream_)) {
|
||||
auto copy_kind = static_cast<rtMemcpyKind_t>(kind);
|
||||
if (copy_kind != RT_MEMCPY_HOST_TO_DEVICE_EX) {
|
||||
MS_LOG(EXCEPTION) << "Memory copy async not support cache host buffer in kind: " << kind;
|
||||
}
|
||||
if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, src, size, static_cast<rtMemcpyKind_t>(kind), stream_)) {
|
||||
MS_LOG(ERROR) << "Call runtime rtMemcpyAsync error.";
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -65,7 +65,6 @@ bool GPUKernelRuntime::SyncStream() {
|
|||
MS_LOG(ERROR) << "Call SyncStream error.";
|
||||
return false;
|
||||
}
|
||||
FreeAndClearBufferPtrs();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -198,14 +197,9 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
} // namespace
|
||||
|
||||
bool GPUKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) {
|
||||
std::shared_ptr<char[]> buffer(new char[size]());
|
||||
MS_EXCEPTION_IF_NULL(buffer);
|
||||
std::copy(reinterpret_cast<const char *>(src), reinterpret_cast<const char *>(src) + size, buffer.get());
|
||||
AddBufferPtr(buffer);
|
||||
|
||||
auto &stream = GPUDeviceManager::GetInstance().default_stream();
|
||||
MS_EXCEPTION_IF_NULL(stream);
|
||||
auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, buffer.get(), size, stream);
|
||||
auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, src, size, stream);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "CopyHostMemToDeviceAsync failed";
|
||||
return false;
|
||||
|
|
|
@ -105,8 +105,6 @@ class KernelRuntime {
|
|||
|
||||
virtual void PreInit() {}
|
||||
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
|
||||
void AddBufferPtr(std::shared_ptr<char[]> ptr) { buffer_ptrs_.push_back(ptr); }
|
||||
void FreeAndClearBufferPtrs() { buffer_ptrs_.clear(); }
|
||||
void GenKernelEvents(const session::KernelGraph *graph);
|
||||
virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }
|
||||
virtual DeviceAddressType GetTargetDeviceAddressType() const = 0;
|
||||
|
@ -162,7 +160,6 @@ class KernelRuntime {
|
|||
std::map<uint32_t,
|
||||
std::pair<std::vector<std::vector<std::function<void()>>>, std::vector<std::vector<std::function<void()>>>>>
|
||||
graph_kernel_events_map_;
|
||||
std::vector<std::shared_ptr<char[]>> buffer_ptrs_ = {};
|
||||
};
|
||||
using KernelRuntimePtr = std::shared_ptr<KernelRuntime>;
|
||||
} // namespace device
|
||||
|
|
Loading…
Reference in New Issue