From 26acb9f801032bdd28a61e59bd752b263f877e6c Mon Sep 17 00:00:00 2001 From: xiefangqi Date: Fri, 6 Nov 2020 09:45:49 +0800 Subject: [PATCH] minddata gpu mode add pin memory --- .../gpu/data/dataset_init_kernel.h | 2 +- .../dataset/engine/cache/CMakeLists.txt | 34 ++++++++++++------ .../engine/datasetops/device_queue_op.cc | 2 +- .../ccsrc/minddata/dataset/util/arena.cc | 30 ++++++++++++++++ mindspore/ccsrc/minddata/dataset/util/arena.h | 22 ++++++++++++ .../minddata/dataset/util/circular_pool.cc | 36 +++++++++++++++++++ .../minddata/dataset/util/circular_pool.h | 12 +++++++ 7 files changed, 126 insertions(+), 12 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h index 82b6463e888..f8cc9b19eaa 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h @@ -49,7 +49,7 @@ class DatasetInitKernel : public GpuKernel { std::vector workspace_size_list_; // The capacity of buffer Q. - size_t buffer_q_capacity_{1}; + size_t buffer_q_capacity_{2}; }; MS_REG_GPU_KERNEL(InitDataSetQueue, DatasetInitKernel) diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt index a0b4382dfd6..27bf9f477e4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt @@ -42,16 +42,30 @@ if (ENABLE_CACHE) storage_container.cc) add_executable(cache_server cache_main.cc) - target_link_libraries(cache_server - engine-cache-server - _c_dataengine - _c_mindrecord - mindspore::protobuf - mindspore::grpc++ - mindspore_gvar - ${PYTHON_LIBRARIES} - ${SECUREC_LIBRARY} - pthread) + if (ENABLE_GPU) + target_link_libraries(cache_server + engine-cache-server + _c_dataengine + _c_mindrecord + mindspore::protobuf + mindspore::grpc++ + mindspore_gvar + ${CUDNN_LIBRARY_PATH} + ${PYTHON_LIBRARIES} + ${SECUREC_LIBRARY} + pthread) + else() + target_link_libraries(cache_server + engine-cache-server + _c_dataengine + _c_mindrecord + mindspore::protobuf + mindspore::grpc++ + mindspore_gvar + ${PYTHON_LIBRARIES} + ${SECUREC_LIBRARY} + pthread) + endif() if (USE_GLOG) target_link_libraries(cache_server mindspore::glog) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc index 15f73bd55fb..dd3b5280022 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc @@ -91,7 +91,7 @@ Status DeviceQueueOp::operator()() { #endif } else if (device_type_ == DeviceType::GPU) { #ifdef ENABLE_GPUQUE - RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_)); + RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_, -1, 1024, false, true)); RETURN_IF_NOT_OK(SendDataToGPU()); #endif } else if (device_type_ == DeviceType::CPU) { diff --git a/mindspore/ccsrc/minddata/dataset/util/arena.cc b/mindspore/ccsrc/minddata/dataset/util/arena.cc index de563cf1dbc..a4194b96327 100644 --- a/mindspore/ccsrc/minddata/dataset/util/arena.cc +++ b/mindspore/ccsrc/minddata/dataset/util/arena.cc @@ -235,14 +235,43 @@ std::ostream &operator<<(std::ostream &os, const ArenaImpl &s) { Status Arena::Init() { try { int64_t sz = size_in_MB_ * 1048576L; +#ifdef ENABLE_GPUQUE + if (is_cuda_malloc_) { + auto ret = cudaHostAlloc(&ptr_, sz, cudaHostAllocDefault); + if (ret != cudaSuccess) { + MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast(ret) << "], " << cudaGetErrorString(ret); + return Status(StatusCode::kOutOfMemory); + } + impl_ = std::make_unique(ptr_, sz); + } else { + RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false)); + impl_ = std::make_unique(ptr_, sz); + } +#else RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false)); impl_ = std::make_unique(ptr_, sz); +#endif } catch (std::bad_alloc &e) { return Status(StatusCode::kOutOfMemory); } return Status::OK(); } +#ifdef ENABLE_GPUQUE +Arena::Arena(size_t val_in_MB, bool is_cuda_malloc) + : ptr_(nullptr), size_in_MB_(val_in_MB), is_cuda_malloc_(is_cuda_malloc) {} + +Status Arena::CreateArena(std::shared_ptr *p_ba, size_t val_in_MB, bool is_cuda_malloc) { + RETURN_UNEXPECTED_IF_NULL(p_ba); + auto ba = new (std::nothrow) Arena(val_in_MB, is_cuda_malloc); + if (ba == nullptr) { + return Status(StatusCode::kOutOfMemory); + } + (*p_ba).reset(ba); + RETURN_IF_NOT_OK(ba->Init()); + return Status::OK(); +} +#else Arena::Arena(size_t val_in_MB) : ptr_(nullptr), size_in_MB_(val_in_MB) {} Status Arena::CreateArena(std::shared_ptr *p_ba, size_t val_in_MB) { @@ -255,5 +284,6 @@ Status Arena::CreateArena(std::shared_ptr *p_ba, size_t val_in_MB) { RETURN_IF_NOT_OK(ba->Init()); return Status::OK(); } +#endif } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/util/arena.h b/mindspore/ccsrc/minddata/dataset/util/arena.h index 8cf686a4c07..09d6467e78f 100644 --- a/mindspore/ccsrc/minddata/dataset/util/arena.h +++ b/mindspore/ccsrc/minddata/dataset/util/arena.h @@ -22,6 +22,9 @@ #include "minddata/dataset/util/allocator.h" #include "minddata/dataset/util/memory_pool.h" #include "minddata/dataset/util/treap.h" +#ifdef ENABLE_GPUQUE +#include +#endif #define ARENA_LOG_BLK_SZ (6u) #define ARENA_BLK_SZ (static_cast(1u << ARENA_LOG_BLK_SZ)) @@ -105,10 +108,18 @@ class Arena : public MemoryPool { Arena(const Arena &) = delete; Arena &operator=(const Arena &) = delete; ~Arena() override { +#ifdef ENABLE_GPUQUE + if (is_cuda_malloc_) { + if (ptr_) { + (void)cudaFreeHost(ptr_); + } + } +#else if (ptr_ != nullptr) { free(ptr_); } ptr_ = nullptr; +#endif } /// As a derived class of MemoryPool, we have to implement the following. @@ -140,16 +151,27 @@ class Arena : public MemoryPool { return os; } +#ifdef ENABLE_GPUQUE + /// The only method to create an arena. + static Status CreateArena(std::shared_ptr *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false); +#else /// The only method to create an arena. static Status CreateArena(std::shared_ptr *p_ba, size_t val_in_MB = 4096); +#endif protected: mutable std::mutex mux_; std::unique_ptr impl_; void *ptr_; size_t size_in_MB_; +#ifdef ENABLE_GPUQUE + bool is_cuda_malloc_; + + explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false); +#else explicit Arena(size_t val_in_MB = 4096); +#endif Status Init(); }; diff --git a/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc index 539c39cc77e..cc9fc9a9b8c 100644 --- a/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc +++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.cc @@ -27,7 +27,11 @@ namespace dataset { Status CircularPool::AddOneArena() { Status rc; std::shared_ptr b; +#ifdef ENABLE_GPUQUE + RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_, is_cuda_malloc_)); +#else RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_)); +#endif tail_ = b.get(); cur_size_in_mb_ += arena_size_; mem_segments_.push_back(std::move(b)); @@ -194,12 +198,43 @@ int CircularPool::PercentFree() const { } } +#ifdef ENABLE_GPUQUE +CircularPool::CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc) + : unlimited_(max_size_in_gb <= 0), + max_size_in_mb_(unlimited_ ? std::numeric_limits::max() : max_size_in_gb * 1024), + arena_size_(arena_size), + is_cuda_malloc_(is_cuda_malloc), + cur_size_in_mb_(0) {} +#else CircularPool::CircularPool(int max_size_in_gb, int arena_size) : unlimited_(max_size_in_gb <= 0), max_size_in_mb_(unlimited_ ? std::numeric_limits::max() : max_size_in_gb * 1024), arena_size_(arena_size), cur_size_in_mb_(0) {} +#endif +#ifdef ENABLE_GPUQUE +Status CircularPool::CreateCircularPool(std::shared_ptr *out_pool, int max_size_in_gb, int arena_size, + bool createOneArena, bool is_cuda_malloc) { + Status rc; + if (out_pool == nullptr) { + RETURN_STATUS_UNEXPECTED("pPool is null"); + } + auto pool = new (std::nothrow) CircularPool(max_size_in_gb, arena_size, is_cuda_malloc); + if (pool == nullptr) { + return Status(StatusCode::kOutOfMemory); + } + if (createOneArena) { + rc = pool->AddOneArena(); + } + if (rc.IsOk()) { + (*out_pool).reset(pool); + } else { + delete pool; + } + return rc; +} +#else Status CircularPool::CreateCircularPool(std::shared_ptr *out_pool, int max_size_in_gb, int arena_size, bool createOneArena) { Status rc; @@ -220,6 +255,7 @@ Status CircularPool::CreateCircularPool(std::shared_ptr *out_pool, i } return rc; } +#endif CircularPool::~CircularPool() = default; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/util/circular_pool.h b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h index 56e28aa6a20..d1f862f0e34 100644 --- a/mindspore/ccsrc/minddata/dataset/util/circular_pool.h +++ b/mindspore/ccsrc/minddata/dataset/util/circular_pool.h @@ -85,8 +85,13 @@ class CircularPool : public MemoryPool { return os; } +#ifdef ENABLE_GPUQUE + static Status CreateCircularPool(std::shared_ptr *out_pool, int max_size_in_gb = -1, + int arena_size = 4096, bool create_one_arena = false, bool is_cuda_malloc = false); +#else static Status CreateCircularPool(std::shared_ptr *out_pool, int max_size_in_gb = -1, int arena_size = 4096, bool create_one_arena = false); +#endif private: ListOfArenas mem_segments_; @@ -96,9 +101,16 @@ class CircularPool : public MemoryPool { int arena_size_; int cur_size_in_mb_; RWLock rw_lock_; +#ifdef ENABLE_GPU + bool is_cuda_malloc_; + + // We can take negative or 0 as input which means unlimited. + CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc); +#else // We can take negative or 0 as input which means unlimited. CircularPool(int max_size_in_gb, int arena_size); +#endif Status AddOneArena(); };