!16492 fix memcopy async error

From: @chujinjin
Reviewed-by: @zhoufeng54,@kisnwang
Signed-off-by: @kisnwang
This commit is contained in:
mindspore-ci-bot 2021-05-18 14:34:21 +08:00 committed by Gitee
commit dcec68bcc7
4 changed files with 7 additions and 17 deletions

View File

@ -113,7 +113,7 @@ void SyncMemory(void *dst, const void *src, uint64_t size, rtMemcpyKind_t kind)
MS_EXCEPTION(DeviceProcessError) << "rtMemcpy failed";
}
} else {
auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(kind));
auto ret = runtime_instance->MemcpyAsync(dst, src, size, static_cast<int32_t>(RT_MEMCPY_HOST_TO_DEVICE_EX));
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "MemcpyAsync failed";
}

View File

@ -703,7 +703,6 @@ bool AscendKernelRuntime::SyncStream() {
MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
return false;
}
FreeAndClearBufferPtrs();
return true;
}
@ -714,11 +713,11 @@ bool AscendKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size,
return false;
}
std::shared_ptr<char[]> buffer(new char[size]());
MS_EXCEPTION_IF_NULL(buffer);
std::copy(reinterpret_cast<const char *>(src), reinterpret_cast<const char *>(src) + size, buffer.get());
AddBufferPtr(buffer);
if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, buffer.get(), size, static_cast<rtMemcpyKind_t>(kind), stream_)) {
auto copy_kind = static_cast<rtMemcpyKind_t>(kind);
if (copy_kind != RT_MEMCPY_HOST_TO_DEVICE_EX) {
MS_LOG(EXCEPTION) << "Memory copy async not support cache host buffer in kind: " << kind;
}
if (RT_ERROR_NONE != rtMemcpyAsync(dst, size, src, size, static_cast<rtMemcpyKind_t>(kind), stream_)) {
MS_LOG(ERROR) << "Call runtime rtMemcpyAsync error.";
return false;
}

View File

@ -65,7 +65,6 @@ bool GPUKernelRuntime::SyncStream() {
MS_LOG(ERROR) << "Call SyncStream error.";
return false;
}
FreeAndClearBufferPtrs();
return true;
}
@ -198,14 +197,9 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
} // namespace
bool GPUKernelRuntime::MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) {
std::shared_ptr<char[]> buffer(new char[size]());
MS_EXCEPTION_IF_NULL(buffer);
std::copy(reinterpret_cast<const char *>(src), reinterpret_cast<const char *>(src) + size, buffer.get());
AddBufferPtr(buffer);
auto &stream = GPUDeviceManager::GetInstance().default_stream();
MS_EXCEPTION_IF_NULL(stream);
auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, buffer.get(), size, stream);
auto ret = GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(dst, src, size, stream);
if (!ret) {
MS_LOG(ERROR) << "CopyHostMemToDeviceAsync failed";
return false;

View File

@ -105,8 +105,6 @@ class KernelRuntime {
virtual void PreInit() {}
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
void AddBufferPtr(std::shared_ptr<char[]> ptr) { buffer_ptrs_.push_back(ptr); }
void FreeAndClearBufferPtrs() { buffer_ptrs_.clear(); }
void GenKernelEvents(const session::KernelGraph *graph);
virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }
virtual DeviceAddressType GetTargetDeviceAddressType() const = 0;
@ -162,7 +160,6 @@ class KernelRuntime {
std::map<uint32_t,
std::pair<std::vector<std::vector<std::function<void()>>>, std::vector<std::vector<std::function<void()>>>>>
graph_kernel_events_map_;
std::vector<std::shared_ptr<char[]>> buffer_ptrs_ = {};
};
using KernelRuntimePtr = std::shared_ptr<KernelRuntime>;
} // namespace device