forked from mindspore-Ecosystem/mindspore
!1373 add memory copy module
Merge pull request !1373 from zyli2020/add_mem_swap_module_header
This commit is contained in:
commit
e82088d175
|
@ -47,6 +47,8 @@ class GPUMemoryManager;
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
|
enum class DeviceAddressStatus { kInDevice, kInHost, kInDeviceToHost, kInHostToDevice };
|
||||||
|
|
||||||
class DeviceAddress {
|
class DeviceAddress {
|
||||||
public:
|
public:
|
||||||
explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {}
|
explicit DeviceAddress(void *ptr, size_t size) : ptr_(ptr), size_(size) {}
|
||||||
|
@ -60,6 +62,8 @@ class DeviceAddress {
|
||||||
size_t GetSize() const { return size_; }
|
size_t GetSize() const { return size_; }
|
||||||
std::string format() const { return format_; }
|
std::string format() const { return format_; }
|
||||||
TypeId type_id() const { return type_id_; }
|
TypeId type_id() const { return type_id_; }
|
||||||
|
virtual void set_status(DeviceAddressStatus status) {}
|
||||||
|
virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const void *ptr() const { return ptr_; }
|
const void *ptr() const { return ptr_; }
|
||||||
|
|
|
@ -15,9 +15,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "device/gpu/cuda_driver.h"
|
#include "device/gpu/cuda_driver.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "utils/log_adapter.h"
|
#include "utils/log_adapter.h"
|
||||||
#include "utils/convert_utils.h"
|
#include "utils/convert_utils.h"
|
||||||
|
|
||||||
|
@ -54,6 +52,27 @@ bool CudaDriver::FreeDeviceMem(const DeviceMemPtr &addr) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t CudaDriver::AllocHostPinnedMem(size_t size, void **addr) {
|
||||||
|
if (size == 0) {
|
||||||
|
MS_LOG(EXCEPTION) << "The memory allocate size is 0";
|
||||||
|
}
|
||||||
|
auto ret = cudaHostAlloc(addr, size, cudaHostAllocDefault);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaDriver::FreeHostPinnedMem(void *addr) {
|
||||||
|
if (addr) {
|
||||||
|
auto ret = cudaFreeHost(addr);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(EXCEPTION) << "cudaFreeHost failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) {
|
bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) {
|
||||||
auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
|
auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
|
||||||
if (ret != cudaSuccess) {
|
if (ret != cudaSuccess) {
|
||||||
|
@ -72,6 +91,25 @@ bool CudaDriver::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size, DeviceStream stream) {
|
||||||
|
auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
|
||||||
|
DeviceStream stream) {
|
||||||
|
auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
size_t CudaDriver::total_mem_size() {
|
size_t CudaDriver::total_mem_size() {
|
||||||
size_t free;
|
size_t free;
|
||||||
size_t total;
|
size_t total;
|
||||||
|
@ -122,6 +160,55 @@ bool CudaDriver::SyncStream(const DeviceStream &stream) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::CreateEvent(DeviceEvent *event, unsigned int flag) {
|
||||||
|
auto ret = cudaEventCreateWithFlags(reinterpret_cast<cudaEvent_t *>(event), flag);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaEventCreateWithFlags failed, ret[" << static_cast<int>(ret) << "], "
|
||||||
|
<< cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::DestroyEvent(const DeviceEvent &event) {
|
||||||
|
auto ret = cudaEventDestroy((cudaEvent_t)event);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaEventDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::RecordEvent(DeviceEvent event, DeviceStream stream) {
|
||||||
|
auto ret = cudaEventRecord((cudaEvent_t)event, (cudaStream_t)stream);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaEventRecord failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::SyncEvent(const DeviceEvent &event) {
|
||||||
|
auto ret = cudaEventSynchronize((cudaEvent_t)event);
|
||||||
|
if (ret != cudaSuccess) {
|
||||||
|
MS_LOG(ERROR) << "cudaEventSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaDriver::QueryEvent(const DeviceEvent &event) {
|
||||||
|
auto ret = cudaEventQuery((cudaEvent_t)event);
|
||||||
|
if (ret == cudaSuccess) {
|
||||||
|
return true;
|
||||||
|
} else if (ret == cudaErrorNotReady) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
MS_LOG(ERROR) << "cudaEventQuery failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int CudaDriver::device_count() {
|
int CudaDriver::device_count() {
|
||||||
int dev_count;
|
int dev_count;
|
||||||
auto ret = cudaGetDeviceCount(&dev_count);
|
auto ret = cudaGetDeviceCount(&dev_count);
|
||||||
|
|
|
@ -33,8 +33,16 @@ class CudaDriver {
|
||||||
// such as malloc/free and memory copy from host to device and reverse.
|
// such as malloc/free and memory copy from host to device and reverse.
|
||||||
static size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr);
|
static size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr);
|
||||||
static bool FreeDeviceMem(const DeviceMemPtr &addr);
|
static bool FreeDeviceMem(const DeviceMemPtr &addr);
|
||||||
|
static size_t AllocHostPinnedMem(size_t size, void **addr);
|
||||||
|
static void FreeHostPinnedMem(void *addr);
|
||||||
|
|
||||||
static bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size);
|
static bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size);
|
||||||
static bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size);
|
static bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size);
|
||||||
|
|
||||||
|
static bool CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size, DeviceStream stream = 0);
|
||||||
|
static bool CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
|
||||||
|
DeviceStream stream = 0);
|
||||||
|
|
||||||
static size_t total_mem_size();
|
static size_t total_mem_size();
|
||||||
static size_t free_mem_size();
|
static size_t free_mem_size();
|
||||||
|
|
||||||
|
@ -44,6 +52,12 @@ class CudaDriver {
|
||||||
static bool DestroyStream(const DeviceStream &stream);
|
static bool DestroyStream(const DeviceStream &stream);
|
||||||
static bool SyncStream(const DeviceStream &stream);
|
static bool SyncStream(const DeviceStream &stream);
|
||||||
|
|
||||||
|
static bool CreateEvent(DeviceEvent *event, unsigned int flag = cudaEventDefault);
|
||||||
|
static bool DestroyEvent(const DeviceEvent &event);
|
||||||
|
static bool RecordEvent(DeviceEvent event, DeviceStream stream = 0);
|
||||||
|
static bool SyncEvent(const DeviceEvent &event);
|
||||||
|
static bool QueryEvent(const DeviceEvent &event);
|
||||||
|
|
||||||
// Encapsulate the cuda APIs associated with device management.
|
// Encapsulate the cuda APIs associated with device management.
|
||||||
static int device_count();
|
static int device_count();
|
||||||
static bool set_current_device(int index);
|
static bool set_current_device(int index);
|
||||||
|
|
|
@ -33,6 +33,11 @@ class GPUDeviceAddress : public DeviceAddress {
|
||||||
|
|
||||||
bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override;
|
bool SyncDeviceToHost(const std::vector<int> &shape, size_t size, TypeId type, void *host_ptr) const override;
|
||||||
bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override;
|
bool SyncHostToDevice(const std::vector<int> &shape, size_t size, TypeId type, const void *host_ptr) const override;
|
||||||
|
void set_status(DeviceAddressStatus status) { status_ = status; }
|
||||||
|
DeviceAddressStatus status() const { return status_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
DeviceAddressStatus status_{DeviceAddressStatus::kInDevice};
|
||||||
};
|
};
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace device
|
} // namespace device
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "device/gpu/gpu_memory_copy_manager.h"
|
||||||
|
#include "device/gpu/gpu_common.h"
|
||||||
|
#include "device/gpu/gpu_device_manager.h"
|
||||||
|
#include "session/anf_runtime_algorithm.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace gpu {
|
||||||
|
void GPUMemCopyManager::Init() {
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(GPUDeviceManager::GetInstance().CreateStream(&swap_out_stream_),
|
||||||
|
"Failed to create CUDA stream of memory swap out.");
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(GPUDeviceManager::GetInstance().CreateStream(&swap_in_stream_),
|
||||||
|
"Failed to create CUDA stream of memory swap in.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUMemCopyManager::AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {
|
||||||
|
MS_EXCEPTION_IF_NULL(device_address);
|
||||||
|
MS_EXCEPTION_IF_NULL(host_addr.addr);
|
||||||
|
DeviceEvent event = nullptr;
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::CreateEvent(&event, cudaEventDisableTiming), "Failed to create CUDA event.");
|
||||||
|
DeviceMemPtr device_ptr = const_cast<DeviceMemPtr>(device_address->GetPtr());
|
||||||
|
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||||
|
device_address->set_status(DeviceAddressStatus::kInDeviceToHost);
|
||||||
|
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(
|
||||||
|
CudaDriver::CopyDeviceMemToHostAsync(host_addr.addr, device_ptr, host_addr.size, swap_out_stream_),
|
||||||
|
"Failed to copy device memory to host.");
|
||||||
|
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::RecordEvent(event, swap_out_stream_),
|
||||||
|
"Failed to record CUDA event to swap out stream.");
|
||||||
|
swap_out_queue_.emplace(device_address, event);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUMemCopyManager::AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {
|
||||||
|
MS_EXCEPTION_IF_NULL(device_address);
|
||||||
|
MS_EXCEPTION_IF_NULL(host_addr.addr);
|
||||||
|
DeviceEvent event = nullptr;
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::CreateEvent(&event, cudaEventDisableTiming), "Failed to create CUDA event.");
|
||||||
|
DeviceMemPtr device_ptr = const_cast<DeviceMemPtr>(device_address->GetPtr());
|
||||||
|
MS_EXCEPTION_IF_NULL(device_ptr);
|
||||||
|
device_address->set_status(DeviceAddressStatus::kInHostToDevice);
|
||||||
|
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(
|
||||||
|
CudaDriver::CopyHostMemToDeviceAsync(device_ptr, host_addr.addr, host_addr.size, swap_in_stream_),
|
||||||
|
"Failed to copy host memory to device.");
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::RecordEvent(event, swap_in_stream_),
|
||||||
|
"Failed to record CUDA event to swap in stream.");
|
||||||
|
swap_in_queue_.emplace(device_address, event);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GPUMemCopyManager::SyncMemCopyStream(SwapKind swap_kind) {
|
||||||
|
if (swap_kind == SwapKind::kDeviceToHost) {
|
||||||
|
return GPUDeviceManager::GetInstance().SyncStream(swap_out_stream_);
|
||||||
|
} else {
|
||||||
|
return GPUDeviceManager::GetInstance().SyncStream(swap_in_stream_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DeviceAddressPtr GPUMemCopyManager::UpdateSwapOutQueue() {
|
||||||
|
if (swap_out_queue_.empty()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto &task = swap_out_queue_.front();
|
||||||
|
auto device_address = task.first;
|
||||||
|
auto &event = task.second;
|
||||||
|
bool finish_swap = CudaDriver::QueryEvent(event);
|
||||||
|
if (!finish_swap) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap out.");
|
||||||
|
swap_out_queue_.pop();
|
||||||
|
return device_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
DeviceAddressPtr GPUMemCopyManager::UpdateSwapInQueue() {
|
||||||
|
if (swap_in_queue_.empty()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto &task = swap_in_queue_.front();
|
||||||
|
auto device_address = task.first;
|
||||||
|
auto &event = task.second;
|
||||||
|
bool finish_swap = CudaDriver::QueryEvent(event);
|
||||||
|
if (!finish_swap) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap in.");
|
||||||
|
swap_in_queue_.pop();
|
||||||
|
return device_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GPUMemCopyManager::AllocHostPinnedMem(size_t size, void **addr) {
|
||||||
|
auto alloc_size = CudaDriver::AllocHostPinnedMem(size, addr);
|
||||||
|
return alloc_size == size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUMemCopyManager::FreeHostPinnedMem(void *addr) { CudaDriver::FreeHostPinnedMem(addr); }
|
||||||
|
|
||||||
|
void GPUMemCopyManager::ClearSwapQueue() {
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(SyncMemCopyStream(SwapKind::kDeviceToHost), "Failed to sync swap out stream");
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(SyncMemCopyStream(SwapKind::kHostToDevice), "Failed to sync swap in stream");
|
||||||
|
|
||||||
|
while (!swap_out_queue_.empty()) {
|
||||||
|
auto &event = swap_out_queue_.front().second;
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap out.");
|
||||||
|
swap_out_queue_.pop();
|
||||||
|
}
|
||||||
|
while (!swap_in_queue_.empty()) {
|
||||||
|
auto &event = swap_in_queue_.front().second;
|
||||||
|
CHECK_OP_RET_WITH_EXCEPT(CudaDriver::DestroyEvent(event), "Failed to destroy CUDA event of swap in.");
|
||||||
|
swap_in_queue_.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,69 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <queue>
|
||||||
|
#include <utility>
|
||||||
|
#include "pre_activate/mem_reuse/mem_copy_manager.h"
|
||||||
|
#include "device/device_address.h"
|
||||||
|
#include "device/gpu/cuda_driver.h"
|
||||||
|
#include "kernel/kernel.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace gpu {
|
||||||
|
using mindspore::device::memswap::MemCopyManager;
|
||||||
|
using mindspore::device::memswap::SwapKind;
|
||||||
|
class GPUMemCopyManager : public MemCopyManager {
|
||||||
|
public:
|
||||||
|
GPUMemCopyManager() = default;
|
||||||
|
|
||||||
|
~GPUMemCopyManager() override = default;
|
||||||
|
|
||||||
|
void Init() override;
|
||||||
|
|
||||||
|
void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) override;
|
||||||
|
|
||||||
|
void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) override;
|
||||||
|
|
||||||
|
bool SyncMemCopyStream(SwapKind swap_kind) override;
|
||||||
|
|
||||||
|
DeviceAddressPtr UpdateSwapOutQueue() override;
|
||||||
|
|
||||||
|
DeviceAddressPtr UpdateSwapInQueue() override;
|
||||||
|
|
||||||
|
bool AllocHostPinnedMem(size_t size, void **addr) override;
|
||||||
|
|
||||||
|
void FreeHostPinnedMem(void *addr) override;
|
||||||
|
|
||||||
|
void ClearSwapQueue() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DeviceStream swap_out_stream_{nullptr};
|
||||||
|
DeviceStream swap_in_stream_{nullptr};
|
||||||
|
std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_out_queue_;
|
||||||
|
std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_in_queue_;
|
||||||
|
};
|
||||||
|
using GPUMemCopyManagerPtr = std::shared_ptr<GPUMemCopyManager>;
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_COPY_MANAGER_H_
|
|
@ -14,8 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_
|
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_
|
||||||
#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_
|
#define MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -26,10 +26,6 @@
|
||||||
#include "device/gpu/cuda_driver.h"
|
#include "device/gpu/cuda_driver.h"
|
||||||
#include "kernel/kernel.h"
|
#include "kernel/kernel.h"
|
||||||
|
|
||||||
using mindspore::device::gpu::DeviceEvent;
|
|
||||||
using mindspore::device::gpu::DeviceMemPtr;
|
|
||||||
using mindspore::device::gpu::DeviceStream;
|
|
||||||
using mindspore::device::gpu::HostMemPtr;
|
|
||||||
using HostAddress = mindspore::kernel::Address;
|
using HostAddress = mindspore::kernel::Address;
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
|
@ -74,31 +70,29 @@ class MemCopyManager {
|
||||||
public:
|
public:
|
||||||
MemCopyManager() = default;
|
MemCopyManager() = default;
|
||||||
|
|
||||||
~MemCopyManager() = default;
|
virtual ~MemCopyManager() = default;
|
||||||
|
|
||||||
void Init();
|
virtual void Init() {}
|
||||||
|
|
||||||
void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr);
|
virtual void AddMemSwapOutTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {}
|
||||||
|
|
||||||
void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr);
|
virtual void AddMemSwapInTask(const DeviceAddressPtr &device_address, const HostAddress &host_addr) {}
|
||||||
|
|
||||||
bool SyncMemCopyStream(SwapKind swap_kind);
|
virtual bool SyncMemCopyStream(SwapKind swap_kind) { return true; }
|
||||||
|
|
||||||
DeviceAddressPtr UpdateSwapOutQueue();
|
virtual DeviceAddressPtr UpdateSwapOutQueue() { return nullptr; }
|
||||||
|
|
||||||
DeviceAddressPtr UpdateSwapInQueue();
|
virtual DeviceAddressPtr UpdateSwapInQueue() { return nullptr; }
|
||||||
|
|
||||||
void ClearSwapQueue();
|
virtual bool AllocHostPinnedMem(size_t size, void **addr) { return true; }
|
||||||
|
|
||||||
private:
|
virtual void FreeHostPinnedMem(void *addr) {}
|
||||||
DeviceStream swap_out_stream_{nullptr};
|
|
||||||
DeviceStream swap_in_stream_{nullptr};
|
virtual void ClearSwapQueue() {}
|
||||||
std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_out_queue_;
|
|
||||||
std::queue<std::pair<DeviceAddressPtr, DeviceEvent>> swap_in_queue_;
|
|
||||||
};
|
};
|
||||||
using MemCopyManagerPtr = std::shared_ptr<MemCopyManager>;
|
using MemCopyManagerPtr = std::shared_ptr<MemCopyManager>;
|
||||||
} // namespace memswap
|
} // namespace memswap
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
||||||
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_SWAP_UTIL_H_
|
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_MEM_REUSE_MEM_COPY_MANAGER_H_
|
|
@ -17,7 +17,9 @@
|
||||||
#define TESTS_UT_STUB_RUNTIME_INCLUDE_CUDA_RUNTIME_API_H_
|
#define TESTS_UT_STUB_RUNTIME_INCLUDE_CUDA_RUNTIME_API_H_
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
typedef enum { cudaSuccess = 0 } cudaError_t;
|
typedef enum { cudaSuccess = 0, cudaErrorNotReady = 1 } cudaError_t;
|
||||||
|
|
||||||
|
unsigned int cudaEventDefault = 0;
|
||||||
|
|
||||||
enum cudaMemcpyKind {
|
enum cudaMemcpyKind {
|
||||||
cudaMemcpyHostToHost = 0,
|
cudaMemcpyHostToHost = 0,
|
||||||
|
|
Loading…
Reference in New Issue