forked from mindspore-Ecosystem/mindspore
fix gpu buffer reentry issue
This commit is contained in:
parent
8589335879
commit
cf829d8415
|
@ -442,10 +442,9 @@ Status DeviceQueueOp::PushDataToGPU() {
|
||||||
auto items = std::move(item.data_item);
|
auto items = std::move(item.data_item);
|
||||||
bool eoe_flag = item.eoe_flag;
|
bool eoe_flag = item.eoe_flag;
|
||||||
int64_t send_batch = 0;
|
int64_t send_batch = 0;
|
||||||
uint32_t handle = INVALID_HANDLE;
|
|
||||||
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
|
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
|
||||||
handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, {}, release_function);
|
auto ret = GpuBufferMgr::GetInstance().Open(channel_name_, {}, release_function);
|
||||||
if (handle == INVALID_HANDLE) {
|
if (ret != BlockQueueStatus_T::SUCCESS) {
|
||||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||||
"[Internal ERROR] Failed to open channel for sending data.");
|
"[Internal ERROR] Failed to open channel for sending data.");
|
||||||
}
|
}
|
||||||
|
@ -463,7 +462,7 @@ Status DeviceQueueOp::PushDataToGPU() {
|
||||||
return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__,
|
return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__,
|
||||||
"[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending).");
|
"[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending).");
|
||||||
}
|
}
|
||||||
RETURN_IF_NOT_OK(RetryPushData(handle, items, is_profiling_enable, &push_cost));
|
RETURN_IF_NOT_OK(RetryPushData(items, is_profiling_enable, &push_cost));
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
ProfilingRecorder(is_profiling_enable, profiling_node, send_batch, push_cost, &batch_start_time, &end_time,
|
ProfilingRecorder(is_profiling_enable, profiling_node, send_batch, push_cost, &batch_start_time, &end_time,
|
||||||
gpu_connector_->capacity(), gpu_connector_->size());
|
gpu_connector_->capacity(), gpu_connector_->size());
|
||||||
|
@ -491,7 +490,7 @@ Status DeviceQueueOp::PushDataToGPU() {
|
||||||
eoe_flag = item.eoe_flag;
|
eoe_flag = item.eoe_flag;
|
||||||
// If the batches send by dataset are more than gpu calculate, gpu will core for no signal notify.
|
// If the batches send by dataset are more than gpu calculate, gpu will core for no signal notify.
|
||||||
if (rc.IsError()) {
|
if (rc.IsError()) {
|
||||||
GpuBufferMgr::GetInstance().Close(handle);
|
GpuBufferMgr::GetInstance().Close(channel_name_);
|
||||||
GpuBufferMgr::GetInstance().CloseConfirm();
|
GpuBufferMgr::GetInstance().CloseConfirm();
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -507,13 +506,12 @@ Status DeviceQueueOp::PushDataToGPU() {
|
||||||
tree_->SetFinished();
|
tree_->SetFinished();
|
||||||
MS_LOG(INFO) << "ExecutionTree finished. Device queue pushed number of batches: " << send_batch;
|
MS_LOG(INFO) << "ExecutionTree finished. Device queue pushed number of batches: " << send_batch;
|
||||||
|
|
||||||
GpuBufferMgr::GetInstance().Close(handle);
|
GpuBufferMgr::GetInstance().Close(channel_name_);
|
||||||
GpuBufferMgr::GetInstance().CloseConfirm();
|
GpuBufferMgr::GetInstance().CloseConfirm();
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DeviceQueueOp::RetryPushData(unsigned int handle, const std::vector<DataItemGpu> &items, const bool profiling,
|
Status DeviceQueueOp::RetryPushData(const std::vector<DataItemGpu> &items, const bool profiling, uint64_t *push_time) {
|
||||||
uint64_t *push_time) {
|
|
||||||
bool flag_log = false;
|
bool flag_log = false;
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
uint64_t start_time = 0;
|
uint64_t start_time = 0;
|
||||||
|
@ -522,7 +520,7 @@ Status DeviceQueueOp::RetryPushData(unsigned int handle, const std::vector<DataI
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
while (!GpuBufferMgr::GetInstance().IsClosed() && !TaskManager::FindMe()->Interrupted()) {
|
while (!GpuBufferMgr::GetInstance().IsClosed() && !TaskManager::FindMe()->Interrupted()) {
|
||||||
BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Push(handle, items, WAIT_TIME);
|
BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Push(channel_name_, items, WAIT_TIME);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (ret == BlockQueueStatus_T::ERROR_INPUT) {
|
if (ret == BlockQueueStatus_T::ERROR_INPUT) {
|
||||||
return Status(
|
return Status(
|
||||||
|
@ -673,16 +671,16 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
|
||||||
Status DeviceQueueOp::ClearDevice() {
|
Status DeviceQueueOp::ClearDevice() {
|
||||||
MS_LOG(INFO) << "Clearing the data in GPU device: " << device_id_ << " channel: " << channel_name_;
|
MS_LOG(INFO) << "Clearing the data in GPU device: " << device_id_ << " channel: " << channel_name_;
|
||||||
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
|
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
|
||||||
auto handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, {}, release_function);
|
auto ret = GpuBufferMgr::GetInstance().Open(channel_name_, {}, release_function);
|
||||||
if (handle == INVALID_HANDLE) {
|
if (ret != BlockQueueStatus_T::SUCCESS) {
|
||||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||||
"[Internal ERROR] Failed to open channel for clearing the device.");
|
"[Internal ERROR] Failed to open channel for clearing the device.");
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Clear(handle);
|
ret = GpuBufferMgr::GetInstance().Clear(channel_name_);
|
||||||
CHECK_FAIL_RETURN_UNEXPECTED(!ret, "Failed to clear the device.");
|
CHECK_FAIL_RETURN_UNEXPECTED(!ret, "Failed to clear the device.");
|
||||||
|
|
||||||
GpuBufferMgr::GetInstance().Close(handle);
|
GpuBufferMgr::GetInstance().Close(channel_name_);
|
||||||
GpuBufferMgr::GetInstance().CloseConfirm();
|
GpuBufferMgr::GetInstance().CloseConfirm();
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -145,7 +145,7 @@ class DeviceQueueOp : public PipelineOp {
|
||||||
#ifdef ENABLE_GPUQUE
|
#ifdef ENABLE_GPUQUE
|
||||||
Status SendDataToGPU();
|
Status SendDataToGPU();
|
||||||
Status MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row, const int32_t &worker_id);
|
Status MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row, const int32_t &worker_id);
|
||||||
Status RetryPushData(unsigned int handle, const std::vector<DataItemGpu> &data, bool profiling, uint64_t *push_time);
|
Status RetryPushData(const std::vector<DataItemGpu> &data, bool profiling, uint64_t *push_time);
|
||||||
void ReleaseData(void *addr, int32_t worker_id);
|
void ReleaseData(void *addr, int32_t worker_id);
|
||||||
Status LaunchParallelCopyThread();
|
Status LaunchParallelCopyThread();
|
||||||
Status PushDataToGPU();
|
Status PushDataToGPU();
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
|
enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, QUEUE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
|
||||||
|
|
||||||
struct DataItemGpu {
|
struct DataItemGpu {
|
||||||
int32_t worker_id_{0};
|
int32_t worker_id_{0};
|
||||||
|
|
|
@ -26,67 +26,51 @@ namespace py = pybind11;
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
static unsigned int AllocHandle() {
|
|
||||||
static std::atomic<unsigned int> handle(1);
|
|
||||||
return handle.fetch_add(1, std::memory_order_relaxed);
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuBufferMgr &GpuBufferMgr::GetInstance() noexcept {
|
GpuBufferMgr &GpuBufferMgr::GetInstance() noexcept {
|
||||||
static GpuBufferMgr instance;
|
static GpuBufferMgr instance;
|
||||||
return instance;
|
return instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T GpuBufferMgr::Create(unsigned int device_id, const std::string &channel_name, void *addr,
|
BlockQueueStatus_T GpuBufferMgr::Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
|
||||||
const std::vector<size_t> &shape, const size_t &capacity) {
|
const size_t &capacity) {
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
MS_LOG(INFO) << "Gpu queue: " << channel_name << " created.";
|
||||||
if (name_queue_map_.count(name)) {
|
if (name_queue_map_.count(channel_name)) {
|
||||||
MS_LOG(ERROR) << "Queue already exist: " << name;
|
MS_LOG(ERROR) << "Queue already exist: " << channel_name;
|
||||||
return QUEUE_EXIST;
|
return QUEUE_EXIST;
|
||||||
}
|
}
|
||||||
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
|
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
|
||||||
BlockQueueStatus_T rt = queue->Create(addr, shape, capacity);
|
BlockQueueStatus_T rt = queue->Create(addr, shape, capacity);
|
||||||
if (rt != SUCCESS) {
|
if (rt != SUCCESS) {
|
||||||
|
MS_LOG(ERROR) << "Queue: " << channel_name << "create failed: " << rt;
|
||||||
return rt;
|
return rt;
|
||||||
}
|
}
|
||||||
(void)name_queue_map_.insert(std::make_pair(name, queue));
|
(void)name_queue_map_.insert(std::make_pair(channel_name, queue));
|
||||||
init_ = true;
|
init_ = true;
|
||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int GpuBufferMgr::Open(unsigned int device_id, const std::string &channel_name,
|
BlockQueueStatus_T GpuBufferMgr::Open(const std::string &channel_name, const std::vector<size_t> &shape,
|
||||||
const std::vector<size_t> &shape, const std::function<void(void *, int32_t)> func) {
|
const std::function<void(void *, int32_t)> func) {
|
||||||
|
MS_LOG(INFO) << "Gpu queue: " << channel_name << " open.";
|
||||||
set_device();
|
set_device();
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
if (!name_queue_map_.count(channel_name)) {
|
||||||
if (!name_queue_map_.count(name)) {
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
MS_LOG(ERROR) << "Queue not exist " << name;
|
return QUEUE_NOT_EXIST;
|
||||||
return INVALID_HANDLE;
|
|
||||||
}
|
}
|
||||||
unsigned int handle = AllocHandle();
|
|
||||||
if (handle == INVALID_HANDLE) {
|
name_queue_map_[channel_name]->RegisterRelease(func);
|
||||||
MS_LOG(ERROR) << "handle is invalid";
|
|
||||||
return INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
(void)handle_queue_map_.insert(std::make_pair(handle, name_queue_map_[name]));
|
|
||||||
name_queue_map_[name]->RegisterRelease(func);
|
|
||||||
open_by_dataset_++;
|
open_by_dataset_++;
|
||||||
return handle;
|
return SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int GpuBufferMgr::Open(unsigned int device_id, const std::string &channel_name,
|
BlockQueueStatus_T GpuBufferMgr::Open(const std::string &channel_name, const std::vector<size_t> &shape) {
|
||||||
const std::vector<size_t> &shape) {
|
MS_LOG(INFO) << "Gpu queue: " << channel_name << " open.";
|
||||||
set_device();
|
set_device();
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
if (!name_queue_map_.count(channel_name)) {
|
||||||
if (!name_queue_map_.count(name)) {
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
MS_LOG(ERROR) << "Queue not exist " << name;
|
return QUEUE_NOT_EXIST;
|
||||||
return INVALID_HANDLE;
|
|
||||||
}
|
}
|
||||||
unsigned int handle = AllocHandle();
|
return SUCCESS;
|
||||||
if (handle == INVALID_HANDLE) {
|
|
||||||
MS_LOG(ERROR) << "handle is invalid";
|
|
||||||
return INVALID_HANDLE;
|
|
||||||
}
|
|
||||||
(void)handle_queue_map_.insert(std::make_pair(handle, name_queue_map_[name]));
|
|
||||||
return handle;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GpuBufferMgr::set_device_id(int device_id) { cur_dev_id_ = device_id; }
|
void GpuBufferMgr::set_device_id(int device_id) { cur_dev_id_ = device_id; }
|
||||||
|
@ -105,44 +89,48 @@ void GpuBufferMgr::set_device() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T GpuBufferMgr::Push(unsigned int handle, const std::vector<DataItemGpu> &data,
|
BlockQueueStatus_T GpuBufferMgr::Push(const std::string &channel_name, const std::vector<DataItemGpu> &data,
|
||||||
unsigned int timeout_in_sec) {
|
unsigned int timeout_in_sec) {
|
||||||
auto iter = handle_queue_map_.find(handle);
|
auto iter = name_queue_map_.find(channel_name);
|
||||||
if (iter == handle_queue_map_.end()) {
|
if (iter == name_queue_map_.end()) {
|
||||||
return HANDLE_NOT_EXIST;
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
|
return QUEUE_NOT_EXIST;
|
||||||
}
|
}
|
||||||
return iter->second->Push(data, timeout_in_sec);
|
return iter->second->Push(data, timeout_in_sec);
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T GpuBufferMgr::Front(unsigned int handle, std::vector<DataItemGpu> *data) {
|
BlockQueueStatus_T GpuBufferMgr::Front(const std::string &channel_name, std::vector<DataItemGpu> *data) {
|
||||||
auto iter = handle_queue_map_.find(handle);
|
auto iter = name_queue_map_.find(channel_name);
|
||||||
if (iter == handle_queue_map_.end()) {
|
if (iter == name_queue_map_.end()) {
|
||||||
return HANDLE_NOT_EXIST;
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
|
return QUEUE_NOT_EXIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter->second->Front(data);
|
return iter->second->Front(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T GpuBufferMgr::Pop(unsigned int handle) {
|
BlockQueueStatus_T GpuBufferMgr::Pop(const std::string &channel_name) {
|
||||||
auto iter = handle_queue_map_.find(handle);
|
auto iter = name_queue_map_.find(channel_name);
|
||||||
if (iter == handle_queue_map_.end()) {
|
if (iter == name_queue_map_.end()) {
|
||||||
return HANDLE_NOT_EXIST;
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
|
return QUEUE_NOT_EXIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter->second->Pop();
|
return iter->second->Pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockQueueStatus_T GpuBufferMgr::Clear(unsigned int handle) {
|
BlockQueueStatus_T GpuBufferMgr::Clear(const std::string &channel_name) {
|
||||||
auto iter = handle_queue_map_.find(handle);
|
auto iter = name_queue_map_.find(channel_name);
|
||||||
if (iter == handle_queue_map_.end()) {
|
if (iter == name_queue_map_.end()) {
|
||||||
return HANDLE_NOT_EXIST;
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
|
return QUEUE_NOT_EXIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter->second->Clear();
|
return iter->second->Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GpuBufferMgr::Close(unsigned int handle) noexcept {
|
void GpuBufferMgr::Close(const std::string &channel_name) noexcept {
|
||||||
if (!handle_queue_map_.count(handle)) {
|
MS_LOG(INFO) << "Close the queue: " << channel_name;
|
||||||
return;
|
|
||||||
}
|
|
||||||
(void)handle_queue_map_.erase(handle);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,6 +139,7 @@ bool GpuBufferMgr::IsInit() const { return init_; }
|
||||||
bool GpuBufferMgr::IsClosed() const { return closed_; }
|
bool GpuBufferMgr::IsClosed() const { return closed_; }
|
||||||
|
|
||||||
bool GpuBufferMgr::Destroy() {
|
bool GpuBufferMgr::Destroy() {
|
||||||
|
MS_LOG(INFO) << "Destroy all GPU queue.";
|
||||||
for (auto iter = name_queue_map_.begin(); iter != name_queue_map_.end(); ++iter) {
|
for (auto iter = name_queue_map_.begin(); iter != name_queue_map_.end(); ++iter) {
|
||||||
std::shared_ptr<BlockingQueue> queue = iter->second;
|
std::shared_ptr<BlockingQueue> queue = iter->second;
|
||||||
if (queue != nullptr) {
|
if (queue != nullptr) {
|
||||||
|
@ -164,9 +153,8 @@ bool GpuBufferMgr::Destroy() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool GpuBufferMgr::isCreated(unsigned int device_id, const std::string &channel_name) {
|
inline bool GpuBufferMgr::isCreated(const std::string &channel_name) {
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
if (name_queue_map_.count(channel_name) != 0) {
|
||||||
if (name_queue_map_.count(name) != 0) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -195,46 +183,20 @@ bool GpuBufferMgr::CloseNotify() {
|
||||||
|
|
||||||
void GpuBufferMgr::CloseConfirm() { sema.Signal(); }
|
void GpuBufferMgr::CloseConfirm() { sema.Signal(); }
|
||||||
|
|
||||||
size_t GpuBufferMgr::Size(unsigned int handle) {
|
size_t GpuBufferMgr::Size(const std::string &channel_name) {
|
||||||
if (handle == INVALID_HANDLE) {
|
if (!name_queue_map_.count(channel_name)) {
|
||||||
MS_LOG(ERROR) << "handle is invalid";
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (handle_queue_map_.count(handle) == 0) {
|
return name_queue_map_.at(channel_name)->Size();
|
||||||
MS_LOG(ERROR) << "Handle not exist " << handle;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return handle_queue_map_.at(handle)->Size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GpuBufferMgr::Size(unsigned int device_id, const std::string &channel_name) {
|
size_t GpuBufferMgr::Capacity(const std::string &channel_name) {
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
if (!name_queue_map_.count(channel_name)) {
|
||||||
if (!name_queue_map_.count(name)) {
|
MS_LOG(ERROR) << "Queue not exist " << channel_name;
|
||||||
MS_LOG(ERROR) << "Queue not exist " << name;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return name_queue_map_.at(name)->Size();
|
return name_queue_map_.at(channel_name)->Capacity();
|
||||||
}
|
|
||||||
|
|
||||||
size_t GpuBufferMgr::Capacity(unsigned int handle) {
|
|
||||||
if (handle == INVALID_HANDLE) {
|
|
||||||
MS_LOG(ERROR) << "handle is invalid";
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (handle_queue_map_.count(handle) == 0) {
|
|
||||||
MS_LOG(ERROR) << "Handle not exist " << handle;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return handle_queue_map_.at(handle)->Capacity();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t GpuBufferMgr::Capacity(unsigned int device_id, const std::string &channel_name) {
|
|
||||||
std::string name = std::to_string(device_id) + std::string("_") + channel_name;
|
|
||||||
if (!name_queue_map_.count(name)) {
|
|
||||||
MS_LOG(ERROR) << "Queue not exist " << name;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return name_queue_map_.at(name)->Capacity();
|
|
||||||
}
|
}
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -62,33 +62,31 @@ class Semaphore {
|
||||||
|
|
||||||
class GpuBufferMgr {
|
class GpuBufferMgr {
|
||||||
public:
|
public:
|
||||||
static const unsigned int INVALID_HANDLE = 0xffffffffUL;
|
|
||||||
|
|
||||||
EXPORT GpuBufferMgr() : cur_dev_id_(0), init_(false), closed_(false), open_by_dataset_(0) {}
|
EXPORT GpuBufferMgr() : cur_dev_id_(0), init_(false), closed_(false), open_by_dataset_(0) {}
|
||||||
|
|
||||||
EXPORT virtual ~GpuBufferMgr() = default;
|
EXPORT virtual ~GpuBufferMgr() = default;
|
||||||
|
|
||||||
EXPORT static GpuBufferMgr &GetInstance() noexcept;
|
EXPORT static GpuBufferMgr &GetInstance() noexcept;
|
||||||
|
|
||||||
EXPORT BlockQueueStatus_T Create(unsigned int device_id, const std::string &channel_name, void *addr,
|
EXPORT BlockQueueStatus_T Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
|
||||||
const std::vector<size_t> &shape, const size_t &capacity);
|
const size_t &capacity);
|
||||||
|
|
||||||
// call for Push thread
|
// call for Push thread
|
||||||
EXPORT unsigned int Open(unsigned int device_id, const std::string &channel_name, const std::vector<size_t> &shape,
|
EXPORT BlockQueueStatus_T Open(const std::string &channel_name, const std::vector<size_t> &shape,
|
||||||
std::function<void(void *, int32_t)> func);
|
std::function<void(void *, int32_t)> func);
|
||||||
|
|
||||||
// call for Front/Pop thread
|
// call for Front/Pop thread
|
||||||
EXPORT unsigned int Open(unsigned int device_id, const std::string &channel_name, const std::vector<size_t> &shape);
|
EXPORT BlockQueueStatus_T Open(const std::string &channel_name, const std::vector<size_t> &shape);
|
||||||
|
|
||||||
EXPORT BlockQueueStatus_T Push(unsigned int handle, const std::vector<DataItemGpu> &data,
|
EXPORT BlockQueueStatus_T Push(const std::string &channel_name, const std::vector<DataItemGpu> &data,
|
||||||
unsigned int timeout_in_sec);
|
unsigned int timeout_in_sec);
|
||||||
EXPORT BlockQueueStatus_T Front(unsigned int handle, std::vector<DataItemGpu> *data);
|
EXPORT BlockQueueStatus_T Front(const std::string &channel_name, std::vector<DataItemGpu> *data);
|
||||||
EXPORT BlockQueueStatus_T Pop(unsigned int handle);
|
EXPORT BlockQueueStatus_T Pop(const std::string &channel_name);
|
||||||
EXPORT BlockQueueStatus_T Clear(unsigned int handle);
|
EXPORT BlockQueueStatus_T Clear(const std::string &channel_name);
|
||||||
|
|
||||||
EXPORT void set_device_id(int device_id);
|
EXPORT void set_device_id(int device_id);
|
||||||
|
|
||||||
EXPORT void Close(unsigned int handle) noexcept;
|
EXPORT void Close(const std::string &channel_name) noexcept;
|
||||||
|
|
||||||
EXPORT bool IsInit() const;
|
EXPORT bool IsInit() const;
|
||||||
|
|
||||||
|
@ -102,13 +100,9 @@ class GpuBufferMgr {
|
||||||
// call for dataset send thread
|
// call for dataset send thread
|
||||||
EXPORT void CloseConfirm();
|
EXPORT void CloseConfirm();
|
||||||
|
|
||||||
EXPORT size_t Size(unsigned int handle);
|
EXPORT size_t Size(const std::string &channel_name);
|
||||||
|
|
||||||
EXPORT size_t Size(unsigned int device_id, const std::string &channel_name);
|
EXPORT size_t Capacity(const std::string &channel_name);
|
||||||
|
|
||||||
EXPORT size_t Capacity(unsigned int handle);
|
|
||||||
|
|
||||||
EXPORT size_t Capacity(unsigned int device_id, const std::string &channel_name);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void set_device() const;
|
void set_device() const;
|
||||||
|
@ -122,10 +116,9 @@ class GpuBufferMgr {
|
||||||
int open_by_dataset_;
|
int open_by_dataset_;
|
||||||
Semaphore sema;
|
Semaphore sema;
|
||||||
|
|
||||||
std::map<unsigned int, std::shared_ptr<BlockingQueue>> handle_queue_map_;
|
|
||||||
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
|
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
|
||||||
|
|
||||||
inline bool isCreated(unsigned int device_id, const std::string &channel_name);
|
inline bool isCreated(const std::string &channel_name);
|
||||||
|
|
||||||
GpuBufferMgr(const GpuBufferMgr &) = delete;
|
GpuBufferMgr(const GpuBufferMgr &) = delete;
|
||||||
GpuBufferMgr &operator=(const GpuBufferMgr &) = delete;
|
GpuBufferMgr &operator=(const GpuBufferMgr &) = delete;
|
||||||
|
|
|
@ -59,7 +59,7 @@ bool DatasetInitKernelMod::Launch(const std::vector<AddressPtr> &, const std::ve
|
||||||
<< len << "].";
|
<< len << "].";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto status = GpuBufferMgr::GetInstance().Create(0, queue_name_, addr, shapes_, buffer_q_capacity_);
|
auto status = GpuBufferMgr::GetInstance().Create(queue_name_, addr, shapes_, buffer_q_capacity_);
|
||||||
if (status) {
|
if (status) {
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status;
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,9 +37,9 @@ namespace kernel {
|
||||||
using mindspore::device::GpuBufferMgr;
|
using mindspore::device::GpuBufferMgr;
|
||||||
|
|
||||||
DatasetIteratorKernelMod::DatasetIteratorKernelMod()
|
DatasetIteratorKernelMod::DatasetIteratorKernelMod()
|
||||||
: handle_(GpuBufferMgr::INVALID_HANDLE), profiling_enable_(false), profiling_op_(nullptr) {}
|
: is_opened_(false), profiling_enable_(false), profiling_op_(nullptr) {}
|
||||||
|
|
||||||
DatasetIteratorKernelMod::~DatasetIteratorKernelMod() { GpuBufferMgr::GetInstance().Close(handle_); }
|
DatasetIteratorKernelMod::~DatasetIteratorKernelMod() { GpuBufferMgr::GetInstance().Close(queue_name_); }
|
||||||
|
|
||||||
bool DatasetIteratorKernelMod::Init(const CNodePtr &kernel_node) {
|
bool DatasetIteratorKernelMod::Init(const CNodePtr &kernel_node) {
|
||||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||||
|
@ -92,10 +92,10 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataItemGpu> *data) {
|
||||||
profiling_enable_ = profiler_inst->GetEnableFlag();
|
profiling_enable_ = profiler_inst->GetEnableFlag();
|
||||||
if (profiling_enable_) {
|
if (profiling_enable_) {
|
||||||
start_time_stamp = profiling_op_->GetTimeStamp();
|
start_time_stamp = profiling_op_->GetTimeStamp();
|
||||||
queue_size = GpuBufferMgr::GetInstance().Size(handle_);
|
queue_size = GpuBufferMgr::GetInstance().Size(queue_name_);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
auto ret = GpuBufferMgr::GetInstance().Front(handle_, data);
|
auto ret = GpuBufferMgr::GetInstance().Front(queue_name_, data);
|
||||||
if (ret == device::SUCCESS) {
|
if (ret == device::SUCCESS) {
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
if (profiling_enable_) {
|
if (profiling_enable_) {
|
||||||
|
@ -115,7 +115,7 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataItemGpu> *data) {
|
||||||
#ifdef ENABLE_DUMP_IR
|
#ifdef ENABLE_DUMP_IR
|
||||||
mindspore::RDR::TriggerAll();
|
mindspore::RDR::TriggerAll();
|
||||||
#endif
|
#endif
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', get data timeout";
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', get data timeout. Queue name: " << queue_name_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
|
@ -124,7 +124,8 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataItemGpu> *data) {
|
||||||
profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
|
profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', get data failed, errcode " << ret;
|
MS_LOG(ERROR) << "For '" << kernel_name_ << "', get data failed, errcode " << ret
|
||||||
|
<< ", queue name: " << queue_name_;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -132,11 +133,12 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataItemGpu> *data) {
|
||||||
|
|
||||||
bool DatasetIteratorKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
bool DatasetIteratorKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||||
const std::vector<AddressPtr> &outputs, void *stream) {
|
const std::vector<AddressPtr> &outputs, void *stream) {
|
||||||
if (handle_ == GpuBufferMgr::INVALID_HANDLE) {
|
if (!is_opened_) {
|
||||||
handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
|
auto ret = GpuBufferMgr::GetInstance().Open(queue_name_, output_size_list_);
|
||||||
if (handle_ == GpuBufferMgr::INVALID_HANDLE) {
|
if (ret != device::BlockQueueStatus_T::SUCCESS) {
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed";
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed: " << ret;
|
||||||
}
|
}
|
||||||
|
is_opened_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ReadDevice(&output_data_)) {
|
if (!ReadDevice(&output_data_)) {
|
||||||
|
@ -155,7 +157,7 @@ bool DatasetIteratorKernelMod::Launch(const std::vector<AddressPtr> &, const std
|
||||||
|
|
||||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)),
|
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)),
|
||||||
"cudaStreamSynchronize failed");
|
"cudaStreamSynchronize failed");
|
||||||
(void)GpuBufferMgr::GetInstance().Pop(handle_);
|
(void)GpuBufferMgr::GetInstance().Pop(queue_name_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ class DatasetIteratorKernelMod : public NativeGpuKernelMod {
|
||||||
private:
|
private:
|
||||||
bool ReadDevice(std::vector<DataItemGpu> *data);
|
bool ReadDevice(std::vector<DataItemGpu> *data);
|
||||||
std::string queue_name_;
|
std::string queue_name_;
|
||||||
unsigned int handle_;
|
bool is_opened_;
|
||||||
bool profiling_enable_;
|
bool profiling_enable_;
|
||||||
std::shared_ptr<GetNextProfiling> profiling_op_;
|
std::shared_ptr<GetNextProfiling> profiling_op_;
|
||||||
std::vector<TypeId> types_;
|
std::vector<TypeId> types_;
|
||||||
|
|
Loading…
Reference in New Issue