refactor data queue

Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
This commit is contained in:
zhoufeng 2022-08-11 10:33:47 +08:00
parent 4da277798e
commit e0c7fa6136
58 changed files with 1302 additions and 1698 deletions

View File

@ -81,7 +81,6 @@ if(ENABLE_D)
endif()
if(ENABLE_GPU)
set(ENABLE_GPUQUE ON)
add_compile_definitions(ENABLE_GPU_COLLECTIVE)
endif()

View File

@ -545,7 +545,7 @@ MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string
root_graph_ = nullptr;
auto ms_context = MsContext::GetInstance();
const bool pynative_mode = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
auto &cut_list = pynative_mode ? compile::control_ops : GetMsNonlinearOps();
auto &cut_list = pynative_mode ? GetControlOps() : GetMsNonlinearOps();
graph_partition_ = std::make_shared<GraphPartition>(cut_list, backend_name);
graph_compiler_ = std::make_shared<GraphCompiler>();

View File

@ -40,11 +40,17 @@ using PrimTypePair = std::pair<PrimitivePtr, AbstractFunctionPtr>;
using MapPrimTypeFuncGraph = std::map<PrimTypePair, FuncGraphPtr>;
using TypedPrimitiveAbstractClosurePtr = std::shared_ptr<abstract::TypedPrimitiveAbstractClosure>;
std::vector<PrimitivePtr> nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
prim::kPrimMakeTuple, prim::kPrimBpropCut};
const std::vector<PrimitivePtr> &GetNonlinearOps() {
static std::vector<PrimitivePtr> nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
prim::kPrimMakeTuple, prim::kPrimBpropCut};
return nonlinear_ops;
}
std::vector<PrimitivePtr> control_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch, prim::kPrimMakeTuple,
prim::kPrimSwitchLayer};
const std::vector<PrimitivePtr> &GetControlOps() {
static std::vector<PrimitivePtr> control_ops = {prim::kPrimReturn, prim::kPrimPartial, prim::kPrimSwitch,
prim::kPrimMakeTuple, prim::kPrimSwitchLayer};
return control_ops;
}
const std::vector<PrimitivePtr> &GetMsNonlinearOps() {
static const std::vector<PrimitivePtr> ms_nonlinear_ops = {prim::kPrimReturn, prim::kPrimPartial,

View File

@ -43,8 +43,8 @@ extern const char kGeVm[];
// compile namespace
// A sub namespace in ME to support compile related definition.
namespace compile {
BACKEND_EXPORT extern std::vector<PrimitivePtr> nonlinear_ops;
BACKEND_EXPORT extern std::vector<PrimitivePtr> control_ops;
BACKEND_EXPORT const std::vector<PrimitivePtr> &GetNonlinearOps();
BACKEND_EXPORT const std::vector<PrimitivePtr> &GetControlOps();
BACKEND_EXPORT const std::vector<PrimitivePtr> &GetMsNonlinearOps();
FuncGraphPtr WrapPrimitives(const FuncGraphPtr &graph);
using VmEvalFunc = std::function<BaseRef(const VectorRef &)>;
@ -52,7 +52,7 @@ using VmEvalFuncPtr = std::shared_ptr<std::function<BaseRef(const VectorRef &)>>
class BACKEND_EXPORT CompileGraph {
public:
explicit CompileGraph(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list = nonlinear_ops);
explicit CompileGraph(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list = GetNonlinearOps());
virtual ~CompileGraph() = default;
@ -113,7 +113,7 @@ using CompileGraphPtr = std::shared_ptr<CompileGraph>;
// CompileGraphs is used to Convert a graph cluster into instruction lists.
class BACKEND_EXPORT CompileGraphs {
public:
explicit CompileGraphs(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list = nonlinear_ops);
explicit CompileGraphs(const BackendPtr &backend, const std::vector<PrimitivePtr> &cut_list = GetNonlinearOps());
virtual ~CompileGraphs() = default;

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_BLOCKING_QUEUE_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_BLOCKING_QUEUE_H_
#ifndef MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_BLOCKING_QUEUE_H
#define MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_BLOCKING_QUEUE_H
#include <unistd.h>
#include <iostream>
@ -25,7 +25,7 @@
#include <vector>
#include <condition_variable>
#include <functional>
#include "runtime/data_queue/data_queue.h"
#include "include/backend/data_queue/data_queue.h"
namespace mindspore {
namespace device {
class BlockingQueue {
@ -33,15 +33,16 @@ class BlockingQueue {
BlockingQueue() : queue_(nullptr) {}
~BlockingQueue() = default;
BlockQueueStatus_T Create(const std::shared_ptr<DataQueue> &data_queue);
DataQueueStatus Create(const std::shared_ptr<DataQueue> &data_queue);
void RegisterRelease(const std::function<void(void *, int32_t)> &func);
BlockQueueStatus_T Push(const std::vector<DataQueueItem> &data, unsigned int timeout_in_sec);
BlockQueueStatus_T Front(std::vector<DataQueueItem> *data);
BlockQueueStatus_T Pop();
BlockQueueStatus_T Clear();
DataQueueStatus Push(const std::vector<DataQueueItem> &data, unsigned int timeout_in_sec);
DataQueueStatus Front(std::vector<DataQueueItem> *data);
DataQueueStatus Pop();
DataQueueStatus Clear();
bool Destroy();
size_t Size() { return queue_->Size(); }
size_t Capacity() { return queue_->Capacity(); }
const std::shared_ptr<DataQueue> &Queue() const { return queue_; }
private:
std::mutex mutex_;
@ -51,5 +52,4 @@ class BlockingQueue {
};
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_BLOCKING_QUEUE_H_
#endif // MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_BLOCKING_QUEUE_H

View File

@ -14,19 +14,30 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_DATA_QUEUE_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_DATA_QUEUE_H_
#ifndef MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_H
#define MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_H
#include <unistd.h>
#include <string>
#include <memory>
#include <vector>
#include <functional>
#include "runtime/hardware/device_context_manager.h"
#include "mindspore/core/utils/data_queue_handler.h"
namespace mindspore {
namespace device {
class DeviceContext;
enum class DataQueueStatus : int { SUCCESS = 0, QUEUE_EXIST, QUEUE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
struct DataQueueItem {
int32_t worker_id_{0};
std::string data_type_;
size_t data_len_{0};
void *data_ptr_{nullptr};
std::vector<int64_t> shapes_;
void *device_addr_{nullptr};
};
class DataQueue {
public:
explicit DataQueue(const size_t capacity);
@ -34,16 +45,21 @@ class DataQueue {
virtual void RegisterRelease(const std::function<void(void *, int32_t)> &func) { host_release_ = func; }
virtual bool IsOpen() const { return true; }
virtual bool IsEmpty() const { return size_ == 0; }
virtual bool IsFull() const { return size_ == capacity_; }
virtual BlockQueueStatus_T Push(std::vector<DataQueueItem> data) = 0;
virtual BlockQueueStatus_T Front(std::vector<DataQueueItem> *data) const = 0;
virtual BlockQueueStatus_T Pop() = 0;
virtual DataQueueStatus Push(std::vector<DataQueueItem> data) = 0;
virtual DataQueueStatus Front(std::vector<DataQueueItem> *data) const = 0;
virtual DataQueueStatus Pop() = 0;
virtual bool Destroy() = 0;
virtual void SetThreadDevice() = 0;
virtual size_t Size() { return size_; }
virtual size_t Capacity() { return capacity_; }
virtual std::shared_ptr<void> AllocHostMem(size_t size) { return std::shared_ptr<void>(::malloc(size), ::free); }
protected:
size_t head_;
size_t tail_;
@ -59,5 +75,4 @@ class DataQueue {
};
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_BLOCKING_QUEUE_H_
#endif // MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_H

View File

@ -0,0 +1,156 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_MGR_H
#define MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_MGR_H
#include <unistd.h>
#include <iostream>
#include <functional>
#include <map>
#include <vector>
#include <string>
#include <memory>
#include <mutex>
#include <condition_variable>
#include "include/backend/visible.h"
#include "include/backend/data_queue/data_queue.h"
#ifndef BUILD_LITE
#include "ir/anf.h"
#endif
namespace mindspore {
namespace device {
constexpr unsigned int MAX_WAIT_TIME_IN_SEC = 60;
class BlockingQueue;
// channel_name, dynamic_shape, capacity, addr, shape
using DataQueueCreator =
std::function<std::shared_ptr<DataQueue>(const std::string &, bool, size_t, void *, const std::vector<size_t> &)>;
class Semaphore {
public:
explicit Semaphore(int count = 0) : count_(count) {}
inline void Signal() {
std::unique_lock<std::mutex> lock(mutex_);
++count_;
cv_.notify_one();
}
inline bool Wait() {
std::unique_lock<std::mutex> lock(mutex_);
while (count_ == 0) {
if (cv_.wait_for(lock, std::chrono::seconds(MAX_WAIT_TIME_IN_SEC)) == std::cv_status::timeout) {
return false;
}
}
--count_;
return true;
}
private:
std::mutex mutex_;
std::condition_variable cv_;
int count_;
};
class BACKEND_EXPORT DataQueueMgr {
public:
DataQueueMgr() : init_(false), closed_(false), open_by_dataset_(0) {}
virtual ~DataQueueMgr() = default;
static DataQueueMgr &GetInstance() noexcept;
void RegisterDataQueueCreator(const std::string &device_name, DataQueueCreator &&creator);
std::shared_ptr<DataQueue> CreateDataQueue(const std::string &device_name, const std::string &channel_name,
bool dynamic_shape, size_t capacity, void *addr,
const std::vector<size_t> &shape);
DataQueueStatus Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
const size_t &capacity);
// call for Push thread
DataQueueStatus Open(const std::string &channel_name, std::function<void(void *, int32_t)> func);
// call for Front/Pop thread
DataQueueStatus Open(const std::string &channel_name) const;
DataQueueStatus Push(const std::string &channel_name, const std::vector<DataQueueItem> &data,
unsigned int timeout_in_sec);
DataQueueStatus Front(const std::string &channel_name, std::vector<DataQueueItem> *data);
DataQueueStatus Pop(const std::string &channel_name);
DataQueueStatus Clear(const std::string &channel_name);
DataQueueStatus OpenDynamicBufQueue(const std::string &channel_name, const std::function<void(void *, int32_t)> func);
DataQueueStatus CreateDynamicBufQueue(const std::string &channel_name, const size_t &capacity);
DataQueueStatus OpenDynamicBufQueue(const std::string &channel_name);
std::shared_ptr<DataQueue> GetDataQueue(const std::string &channel_name) const;
DataQueueStatus SetThreadDevice(const std::string &device_name);
std::shared_ptr<void> AllocHostMem(const std::string &device_name, size_t size);
void Close(const std::string &channel_name) const noexcept;
bool IsInit() const;
bool IsClosed() const;
bool Destroy();
// call for Release GPU Resources
bool CloseNotify();
// call for dataset send thread
void CloseConfirm();
size_t Size(const std::string &channel_name);
size_t Capacity(const std::string &channel_name);
private:
inline bool isCreated(const std::string &channel_name) const;
DataQueueMgr(const DataQueueMgr &) = delete;
DataQueueMgr &operator=(const DataQueueMgr &) = delete;
bool init_;
bool closed_;
std::mutex mutex_;
std::mutex close_mutex_;
std::condition_variable cv_;
// how many queues opened by dataset
int open_by_dataset_;
Semaphore sema;
bool dynamic_shape_{false};
size_t default_capacity_{2};
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
std::map<std::string, DataQueueCreator> data_queue_creator_map_ = {}; // key: device name, value: DataQueueCreator
};
#ifndef BUILD_LITE
bool PopDataFromDataQueue(const AnfNodePtr &data_kernel);
#endif
#define REGISTER_DATA_QUEUE_CREATOR(device_name, creator) \
struct device_name##DataQueueCreatorClass { \
device_name##DataQueueCreatorClass() { \
DataQueueMgr::GetInstance().RegisterDataQueueCreator(device_name, creator); \
} \
} g_##device_name##_data_queue_creator;
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_INCLUDE_BACKEND_DATA_QUEUE_DATA_QUEUE_MGR_H

View File

@ -24,10 +24,6 @@ if(ENABLE_ACL)
add_definitions(-D ENABLE_ACL)
message(STATUS "ACL module is enabled")
endif()
if(ENABLE_GPUQUE)
add_definitions(-D ENABLE_GPUQUE)
message(STATUS "GPU queue is enabled")
endif()
if(ENABLE_TDTQUE)
add_definitions(-D ENABLE_TDTQUE)
message(STATUS "TDT queue is enabled")
@ -119,9 +115,6 @@ endif()
if(NOT ENABLE_SECURITY)
add_dependencies(engine-perf core)
endif()
if(ENABLE_TDTQUE)
add_dependencies(engine-device-queue core)
endif()
# for proto/cache_grpc.pb.h dependency
if(ENABLE_CACHE)
@ -204,11 +197,6 @@ if(ENABLE_TDTQUE)
if(NOT ENABLE_SECURITY)
set(dataengine_submodules ${dataengine_submodules} $<TARGET_OBJECTS:engine-perf>)
endif()
set(dataengine_submodules ${dataengine_submodules} $<TARGET_OBJECTS:engine-device-queue>)
# tdt impl
set(dataengine_submodules ${dataengine_submodules} $<TARGET_OBJECTS:engine-tdt>)
# host queue impl
set(dataengine_submodules ${dataengine_submodules} $<TARGET_OBJECTS:engine-host-queue>)
else()
if(NOT ENABLE_SECURITY)
set(dataengine_submodules ${dataengine_submodules} $<TARGET_OBJECTS:engine-perf>)
@ -251,16 +239,7 @@ target_link_libraries(_c_dataengine PUBLIC mindspore::jpeg_turbo mindspore::turb
mindspore::opencv_imgcodecs mindspore::opencv_imgproc mindspore::tinyxml2
mindspore::sentencepiece_train ${ICU_LIB})
if(ENABLE_GPUQUE)
target_link_libraries(_c_dataengine PRIVATE
${CUDNN_LIBRARY_PATH}
${CUDA_PATH}/lib64/libcudart.so
${CUDA_PATH}/lib64/stubs/libcuda.so)
endif()
if(ENABLE_TDTQUE)
target_link_libraries(_c_dataengine PRIVATE ${ACL} ${ACL_TDT_CHANNEL})
endif()
target_link_libraries(_c_dataengine PRIVATE mindspore_backend)
add_dependencies(_c_dataengine _c_mindrecord)
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")

View File

@ -10,10 +10,6 @@ endif()
add_subdirectory(cache)
if(ENABLE_TDTQUE)
add_subdirectory(device_queue_impl)
endif()
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
set(SRC_FILES_LIST
@ -67,7 +63,3 @@ if(NOT ENABLE_SECURITY)
add_dependencies(engine-perf engine-cache-client)
endif()
endif()
if(ENABLE_TDTQUE)
add_dependencies(engine engine-device-queue)
endif()

View File

@ -35,7 +35,9 @@
#include "minddata/mindrecord/include/shard_header.h"
#include "minddata/mindrecord/include/shard_writer.h"
#endif
#ifdef WITH_BACKEND
#include "utils/ms_context.h"
#endif
namespace mindspore {
namespace dataset {
#ifndef ENABLE_SECURITY
@ -355,18 +357,19 @@ Status TreeConsumer::Reset(int64_t step) {
#endif
RETURN_IF_NOT_OK(this->Terminate());
}
#ifdef ENABLE_GPUQUE
// clear the device if GPU is used.
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");
DeviceQueueOp *op = dynamic_cast<DeviceQueueOp *>(root.get());
if (op != nullptr) {
MS_LOG(INFO) << "Clearing the GPU device";
RETURN_IF_NOT_OK(op->ClearDevice());
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
// clear the device if GPU is used.
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");
DeviceQueueOp *op = dynamic_cast<DeviceQueueOp *>(root.get());
if (op != nullptr) {
MS_LOG(INFO) << "Clearing the GPU device";
RETURN_IF_NOT_OK(op->ClearDevice());
}
}
#endif
tree_adapter_ = std::make_unique<TreeAdapter>(TreeAdapter::UsageFlag::kDeReset);
RETURN_IF_NOT_OK(tree_adapter_->Compile(old_root, num_epochs_, step));
RETURN_IF_NOT_OK(tree_adapter_->Launch());

View File

@ -21,12 +21,33 @@
#include <memory>
#include <unordered_map>
#include "minddata/dataset/engine/gpu_item_connector.h"
#include "minddata/dataset/engine/dataset_iterator.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/util/task_manager.h"
#ifdef WITH_BACKEND
#include "mindspore/ccsrc/include/backend/data_queue/data_queue_mgr.h"
#endif
#include "mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.h"
#ifdef WITH_BACKEND
#include "utils/ms_context.h"
#endif
namespace mindspore {
namespace dataset {
namespace {
std::vector<DataQueueItem> ConvertTensorRowToDataQueueItem(const TensorRow &row) {
std::vector<device::DataQueueItem> items;
for (auto &i : row) {
device::DataQueueItem data_item;
data_item.data_len_ = static_cast<size_t>(i->SizeInBytes());
data_item.shapes_ = i->shape().AsVector();
data_item.data_ptr_ = const_cast<void *>(static_cast<const void *>(i->GetBuffer()));
data_item.data_type_ = i->type().ToString();
items.emplace_back(std::move(data_item));
}
return items;
}
} // namespace
DeviceQueueOp::DeviceQueueOp(const std::string channel_name, DeviceType device_type, int32_t device_id,
bool send_epoch_end, int32_t total_batch, bool create_data_info_queue)
: PipelineOp(1),
@ -40,37 +61,23 @@ DeviceQueueOp::DeviceQueueOp(const std::string channel_name, DeviceType device_t
create_data_info_queue_(create_data_info_queue),
data_info_queue_ptr_(nullptr),
first_fetch_flag_(false),
first_push_flag_(false)
#ifdef ENABLE_TDTQUE
,
ascend_keep_waiting_(true),
tdtInstancePtr(std::make_shared<TdtPlugin>(channel_name_, device_id_))
#endif
{
first_push_flag_(false),
ascend_keep_waiting_(true) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
dynamic_shape_ = cfg->dynamic_shape();
#ifdef ENABLE_GPUQUE
// Get the total device num of current machine
int32_t device_count = 0;
cudaGetDeviceCount(&device_count);
rank_id_ = cfg->rank_id(); // Get the current rank_id
if (device_count > 0) {
rank_id_ = rank_id_ % device_count;
}
// Be careful when try to modified these num_workers_ and queue_capacity_,
// and we suggest num_workers_ * queue_capacity_ not greater than 16, because
// one worker one circular_pool with 1G pin memory, so num_workers_ * queue_capacity_
// must limit to avoid memory overload
num_workers_ = kDeviceQueGpuNumThreads;
queue_capacity_ = kDeviceQueGpuQueueCapacity;
#endif
#ifdef ENABLE_TDTQUE
ascend_keep_waiting_ = true;
if (kIsHeterogeneous) {
tdtInstancePtr = std::make_shared<HostQueueImpl>(channel_name_, device_id_);
} else {
tdtInstancePtr = std::make_shared<TdtPlugin>(channel_name_, device_id_);
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && !dynamic_shape_) {
ascend_data_queue_ =
device::DataQueueMgr::GetInstance().CreateDataQueue(kAscendDevice, channel_name, dynamic_shape_, 0, nullptr, {});
}
#endif
#ifdef ENABLE_DUMP_IR
@ -87,13 +94,11 @@ DeviceQueueOp::~DeviceQueueOp() {
#endif
}
#ifdef ENABLE_GPUQUE
void DeviceQueueOp::ReleaseData(void *addr, int32_t worker_id) {
if (addr != nullptr && worker_id >= 0 && worker_id < pool_.size()) {
pool_[worker_id]->Deallocate(addr);
}
}
#endif
Status DeviceQueueOp::EoeReceived(int32_t worker_id) {
state_ = OpState::kDeOpIdle;
@ -150,7 +155,7 @@ Status DeviceQueueOp::operator()() {
}
#endif
if (device_type_ == DeviceType::Ascend) {
#ifdef ENABLE_TDTQUE
#ifdef WITH_BACKEND
if (create_data_info_queue_) {
// This place has a race condition with GetDataInfo, so the first one
// arrive here will do the initialize work.
@ -162,10 +167,6 @@ Status DeviceQueueOp::operator()() {
}
}
}
if (!kIsHeterogeneous && tdtInstancePtr->acl_handle_ == nullptr) {
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Create channel for sending data failed, please check DEVICE ID setting.");
}
if (dynamic_shape_) {
RETURN_IF_NOT_OK(SendDataToAscendDynamic());
@ -174,7 +175,7 @@ Status DeviceQueueOp::operator()() {
}
#endif
} else if (device_type_ == DeviceType::GPU) {
#ifdef ENABLE_GPUQUE
#ifdef WITH_BACKEND
RETURN_IF_NOT_OK(SendDataToGPU());
#endif
} else if (device_type_ == DeviceType::CPU) {
@ -184,7 +185,6 @@ Status DeviceQueueOp::operator()() {
return Status::OK();
}
#ifdef ENABLE_TDTQUE
Status DeviceQueueOp::SendDataToAscend() {
MS_LOG(INFO) << "Device queue, sending data to Ascend.";
#ifndef ENABLE_SECURITY
@ -242,7 +242,7 @@ Status DeviceQueueOp::SendDataToAscend() {
#endif
PrintBeginInfoWhenFirstBatch(first_push_flag_);
// when training stopped, handle might have been destroyed immediately
if (!kIsHeterogeneous && tdtInstancePtr->acl_handle_ == nullptr) {
if (ascend_data_queue_ != nullptr && !ascend_data_queue_->IsOpen()) {
MS_LOG(WARNING) << "Thread has already been terminated.";
is_break_loop = true;
continue;
@ -306,9 +306,22 @@ Status DeviceQueueOp::SendEpochEndToAscend(const TensorRow &curr_row, const bool
int32_t *tdt_cost, bool *is_break_loop) {
RETURN_UNEXPECTED_IF_NULL(tdt_cost);
RETURN_UNEXPECTED_IF_NULL(is_break_loop);
if (curr_row.eoe() && send_epoch_end_ && tdtInstancePtr->acl_handle_ != nullptr) {
if (curr_row.eoe() && send_epoch_end_ && ascend_data_queue_->IsOpen()) {
TensorRow dummy_row;
auto status = tdtInstancePtr->hostPush(dummy_row, is_profiling_enable, tdt_cost, ACL_TENSOR_DATA_END_OF_SEQUENCE);
#ifndef ENABLE_SECURITY
double start_time = 0;
if (is_profiling_enable) {
start_time = ProfilingTime::GetCurMilliSecond();
}
#endif
auto status = ascend_data_queue_->Push({});
#ifndef ENABLE_SECURITY
if (is_profiling_enable) {
double end_time = ProfilingTime::GetCurMilliSecond();
// compile error occurring when MS_EXCEPTION_IF_NULL
*tdt_cost = static_cast<int32_t>(end_time - start_time);
}
#endif
RETURN_IF_NOT_OK(CheckPushStatus(status, stop_send_, &send_finished_, is_break_loop));
MS_LOG(INFO) << "an epoch has already sent, now stop send data.";
@ -337,8 +350,22 @@ void DeviceQueueOp::LimitSendingBatches(int64_t send_batch, int64_t *sending_num
}
Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable, int32_t *tdt_cost) {
auto status = tdtInstancePtr->hostPush(curr_row, is_profiling_enable, tdt_cost);
if (status != Status::OK()) {
std::vector<device::DataQueueItem> items = ConvertTensorRowToDataQueueItem(curr_row);
#ifndef ENABLE_SECURITY
double start_time = 0;
if (is_profiling_enable) {
start_time = ProfilingTime::GetCurMilliSecond();
}
#endif
auto status = ascend_data_queue_->Push(items);
#ifndef ENABLE_SECURITY
if (is_profiling_enable) {
double end_time = ProfilingTime::GetCurMilliSecond();
// compile error occurring when MS_EXCEPTION_IF_NULL
*tdt_cost = static_cast<int32_t>(end_time - start_time);
}
#endif
if (status != device::DataQueueStatus::SUCCESS) {
if (stop_send_) {
MS_LOG(INFO) << "stop_send received";
return Status::OK();
@ -358,15 +385,16 @@ Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable,
return Status::OK();
}
Status DeviceQueueOp::CheckPushStatus(Status status, bool stop_send, bool *send_finished, bool *is_break_loop) {
if (status != Status::OK()) {
Status DeviceQueueOp::CheckPushStatus(DataQueueStatus status, bool stop_send, bool *send_finished,
bool *is_break_loop) {
if (status != DataQueueStatus::SUCCESS) {
if (stop_send) {
*send_finished = true;
MS_LOG(INFO) << "stop_send received";
return Status::OK();
}
// when training stopped, handle might have been destroyed immediately
if (tdtInstancePtr->acl_handle_ == nullptr) {
if (!ascend_data_queue_->IsOpen()) {
*is_break_loop = true;
MS_LOG(WARNING) << "Thread has already been terminated.";
return Status::OK();
@ -379,10 +407,14 @@ Status DeviceQueueOp::CheckPushStatus(Status status, bool stop_send, bool *send_
}
return Status::OK();
}
#endif
#ifdef ENABLE_TDTQUE
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kAscendDevice) {
RETURN_STATUS_UNEXPECTED("'GetDataInfo' only supported on Ascend.");
}
if (!create_data_info_queue_) {
RETURN_STATUS_UNEXPECTED("[Internal ERROR] DataInfo queue is not created.");
}
@ -396,38 +428,29 @@ Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
}
}
RETURN_IF_NOT_OK(data_info_queue_ptr_->PopFront(data_info));
#endif
return Status::OK();
}
#else
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
RETURN_STATUS_UNEXPECTED("'GetDataInfo' only supported on Ascend.");
}
#endif
#ifdef ENABLE_GPUQUE
Status DeviceQueueOp::SetThreadDevice() {
// Without cudaSetDevice cuda memory will allocate on GPU:0 as default
// and will overload in distribute scenario.
auto ret = cudaSetDevice(rank_id_);
if (ret != cudaSuccess) {
std::string err;
err += "cudaSetDevice failed, ret[";
err += std::to_string(static_cast<int>(ret));
err += "], ";
err += cudaGetErrorString(ret);
RETURN_STATUS_UNEXPECTED(err);
}
#ifdef WITH_BACKEND
(void)device::DataQueueMgr::GetInstance().SetThreadDevice("GPU");
#endif
return Status::OK();
}
Status DeviceQueueOp::LaunchParallelCopyThread() {
#ifdef WITH_BACKEND
RETURN_UNEXPECTED_IF_NULL(tree_);
// Every thread use cuda api should SetThreadDevice
RETURN_IF_NOT_OK(SetThreadDevice());
// CircularPool may not safe under multi-threads scenario, so one worker with one pool
for (int i = 0; i < num_workers_; i++) {
std::shared_ptr<MemoryPool> pool;
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool, -1, kDeviceQueGpuThreadMemory, false, true));
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(
&pool, -1, kDeviceQueGpuThreadMemory, false,
MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice));
pool_.push_back(pool);
}
gpu_connector_ = std::make_unique<GpuConnector>(num_workers_, 1, queue_capacity_);
@ -437,15 +460,20 @@ Status DeviceQueueOp::LaunchParallelCopyThread() {
tree_->LaunchWorkers(num_workers_, std::bind(&DeviceQueueOp::WorkerEntry, this, std::placeholders::_1), "", id()));
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Push data to GPU queue",
std::bind(&DeviceQueueOp::PushDataToGPU, this), nullptr, id()));
#endif
return Status::OK();
}
bool DeviceQueueOp::NoExceptionRaised() {
return !TaskManager::FindMe()->Interrupted() && !mindspore::DataQueueHandler::IsClosed();
#ifdef WITH_BACKEND
return !TaskManager::FindMe()->Interrupted() && !device::DataQueueMgr::GetInstance().IsClosed();
#else
return !TaskManager::FindMe()->Interrupted();
#endif
}
Status DeviceQueueOp::PushDataToGPU() {
#ifdef WITH_BACKEND
RETURN_UNEXPECTED_IF_NULL(tree_);
// Every thread use cuda api should SetThreadDevice
RETURN_IF_NOT_OK(SetThreadDevice());
@ -473,17 +501,17 @@ Status DeviceQueueOp::PushDataToGPU() {
bool eoe_flag = item.eoe_flag;
int64_t send_batch = 0;
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
BlockQueueStatus_T ret;
DataQueueStatus ret;
if (dynamic_shape_) {
ret = mindspore::DataQueueHandler::OpenDynamicBufQueue(channel_name_, release_function);
ret = device::DataQueueMgr::GetInstance().OpenDynamicBufQueue(channel_name_, release_function);
} else {
ret = mindspore::DataQueueHandler::Open(channel_name_, release_function);
ret = device::DataQueueMgr::GetInstance().Open(channel_name_, release_function);
}
if (ret != BlockQueueStatus_T::SUCCESS) {
if (ret != DataQueueStatus::SUCCESS) {
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to open channel for sending data.");
}
while (!(items.empty() && !eoe_flag) && !mindspore::DataQueueHandler::IsClosed()) {
while (!(items.empty() && !eoe_flag) && !device::DataQueueMgr::GetInstance().IsClosed()) {
if (!eoe_flag) {
#ifdef ENABLE_DUMP_IR
md_channel_info_->RecordBatchQueue(gpu_connector_->size());
@ -524,8 +552,8 @@ Status DeviceQueueOp::PushDataToGPU() {
eoe_flag = item.eoe_flag;
// If the batches send by dataset are more than gpu calculate, gpu will core for no signal notify.
if (rc.IsError()) {
mindspore::DataQueueHandler::Close(channel_name_);
mindspore::DataQueueHandler::CloseConfirm();
device::DataQueueMgr::GetInstance().Close(channel_name_);
device::DataQueueMgr::GetInstance().CloseConfirm();
return rc;
}
} else {
@ -540,8 +568,8 @@ Status DeviceQueueOp::PushDataToGPU() {
tree_->SetFinished();
MS_LOG(INFO) << "ExecutionTree finished. Device queue pushed number of batches: " << send_batch;
mindspore::DataQueueHandler::Close(channel_name_);
mindspore::DataQueueHandler::CloseConfirm();
device::DataQueueMgr::GetInstance().Close(channel_name_);
device::DataQueueMgr::GetInstance().CloseConfirm();
return Status::OK();
}
@ -553,7 +581,7 @@ Status DeviceQueueOp::WorkerEntry(int32_t worker_id) {
TensorRow current_row;
uint32_t batch_num = 0;
RETURN_IF_NOT_OK(receive_queues_[worker_id]->PopFront(&current_row));
while (!current_row.quit() && !mindspore::DataQueueHandler::IsClosed()) {
while (!current_row.quit() && !device::DataQueueMgr::GetInstance().IsClosed()) {
GpuConnectorItem connector_item = {{}, current_row.eoe()};
if (!connector_item.eoe_flag) {
std::vector<device::DataQueueItem> items;
@ -565,6 +593,7 @@ Status DeviceQueueOp::WorkerEntry(int32_t worker_id) {
data_item.worker_id_ = worker_id;
items.push_back(data_item);
}
RETURN_IF_NOT_OK(MallocForGPUData(&items, current_row, worker_id));
connector_item.data_item = std::move(items);
batch_num++;
@ -579,10 +608,12 @@ Status DeviceQueueOp::WorkerEntry(int32_t worker_id) {
// Add empty data_item vector with eoe_flag=false as quit flag.
GpuConnectorItem connector_item = {{}, false};
RETURN_IF_NOT_OK(gpu_connector_->Add(worker_id, std::move(connector_item)));
#endif
return Status::OK();
}
Status DeviceQueueOp::SendDataToGPU() {
#ifdef WITH_BACKEND
RETURN_IF_NOT_OK(LaunchParallelCopyThread());
MS_LOG(INFO) << "Device queue, sending data to GPU.";
#ifndef ENABLE_SECURITY
@ -594,10 +625,8 @@ Status DeviceQueueOp::SendDataToGPU() {
first_fetch_flag_ = true;
int64_t num_buf = 0;
bool is_break_loop = false;
uint32_t batch_num = 0;
while (!current_row.eof() && !is_break_loop && !mindspore::DataQueueHandler::IsClosed()) {
while (!current_row.eoe() && !is_break_loop && !mindspore::DataQueueHandler::IsClosed()) {
batch_num++;
while (!current_row.eof() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
while (!current_row.eoe() && !is_break_loop && !device::DataQueueMgr::GetInstance().IsClosed()) {
RETURN_IF_NOT_OK(FilterMetadata(&current_row));
RETURN_IF_NOT_OK(CheckExceptions(current_row));
#ifndef ENABLE_SECURITY
@ -636,6 +665,7 @@ Status DeviceQueueOp::SendDataToGPU() {
}
MS_LOG(INFO) << "Device queue received number of batches and EOEs: " << (num_buf - num_workers_);
#endif
return Status::OK();
}
@ -664,23 +694,23 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataQueueItem> *items
}
Status DeviceQueueOp::ClearDevice() {
#ifdef WITH_BACKEND
MS_LOG(INFO) << "Clearing the data in GPU device: " << device_id_ << " channel: " << channel_name_;
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2);
auto ret = mindspore::DataQueueHandler::Open(channel_name_, release_function);
if (ret != BlockQueueStatus_T::SUCCESS) {
auto ret = device::DataQueueMgr::GetInstance().Open(channel_name_, release_function);
if (ret != DataQueueStatus::SUCCESS) {
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to open channel for clearing the device.");
}
ret = mindspore::DataQueueHandler::Clear(channel_name_);
CHECK_FAIL_RETURN_UNEXPECTED(!ret, "Failed to clear the device.");
ret = device::DataQueueMgr::GetInstance().Clear(channel_name_);
CHECK_FAIL_RETURN_UNEXPECTED(ret == DataQueueStatus::SUCCESS, "Failed to clear the device.");
mindspore::DataQueueHandler::Close(channel_name_);
mindspore::DataQueueHandler::CloseConfirm();
device::DataQueueMgr::GetInstance().Close(channel_name_);
device::DataQueueMgr::GetInstance().CloseConfirm();
#endif
return Status::OK();
}
#endif
Status DeviceQueueOp::SendDataToCPU() {
MS_LOG(INFO) << "Device queue, sending data to CPU.";
int64_t total_batch = 0;
@ -794,6 +824,7 @@ void DeviceQueueOp::PrintEndInfoWhenFirstBatch(bool *first_push_flag) {
}
Status DeviceQueueOp::RetryPushData(const std::vector<DataQueueItem> &items, const bool profiling,
uint64_t *push_time) {
#ifdef WITH_BACKEND
bool flag_log = false;
#ifndef ENABLE_SECURITY
uint64_t start_time = 0;
@ -801,10 +832,10 @@ Status DeviceQueueOp::RetryPushData(const std::vector<DataQueueItem> &items, con
start_time = ProfilingTime::GetCurMilliSecond();
}
#endif
while (!mindspore::DataQueueHandler::IsClosed() && !TaskManager::FindMe()->Interrupted()) {
BlockQueueStatus_T ret = mindspore::DataQueueHandler::Push(channel_name_, items, WAIT_TIME);
if (ret != BlockQueueStatus_T::SUCCESS) {
if (ret == BlockQueueStatus_T::ERROR_INPUT) {
while (!device::DataQueueMgr::GetInstance().IsClosed() && !TaskManager::FindMe()->Interrupted()) {
DataQueueStatus ret = device::DataQueueMgr::GetInstance().Push(channel_name_, items, WAIT_TIME);
if (ret != DataQueueStatus::SUCCESS) {
if (ret == DataQueueStatus::ERROR_INPUT) {
return Status(
StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid data, the types or shapes of current row is different with previous row(i.e. do batch operation but "
@ -827,11 +858,13 @@ Status DeviceQueueOp::RetryPushData(const std::vector<DataQueueItem> &items, con
if (profiling) {
*push_time = ProfilingTime::GetCurMilliSecond() - start_time;
}
#endif
#endif
return Status::OK();
}
Status DeviceQueueOp::SendDataToAscendDynamic() {
#ifdef WITH_BACKEND
MS_LOG(DEBUG) << "Dynamic Device queue, sending data to Ascend.";
int64_t send_batch = 0;
@ -840,8 +873,8 @@ Status DeviceQueueOp::SendDataToAscendDynamic() {
bool is_break_loop = false;
std::function<void(void *, int32_t)> release_function([](void *, int32_t) { return; });
auto ret = mindspore::DataQueueHandler::OpenDynamicBufQueue(channel_name_, release_function);
if (ret != BlockQueueStatus_T::SUCCESS) {
auto ret = device::DataQueueMgr::GetInstance().OpenDynamicBufQueue(channel_name_, release_function);
if (ret != DataQueueStatus::SUCCESS) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] Failed to open channel for sending data.");
}
@ -854,16 +887,7 @@ Status DeviceQueueOp::SendDataToAscendDynamic() {
while (!curr_row.eoe() && !is_break_loop) {
RETURN_IF_NOT_OK(FilterMetadata(&curr_row));
RETURN_IF_NOT_OK(CheckExceptions(curr_row));
std::vector<device::DataQueueItem> items;
for (auto &i : curr_row) {
device::DataQueueItem data_item;
data_item.data_len_ = static_cast<size_t>(i->SizeInBytes());
data_item.shapes_ = i->shape().AsVector();
data_item.data_ptr_ = const_cast<void *>(static_cast<const void *>(i->GetBuffer()));
data_item.data_type_ = i->type().ToString();
items.push_back(data_item);
}
std::vector<device::DataQueueItem> items = ConvertTensorRowToDataQueueItem(curr_row);
RETURN_IF_NOT_OK(RetryPushData(items, false, &data_queue_cost));
if (create_data_info_queue_) {
DATA_INFO data_info;
@ -895,9 +919,9 @@ Status DeviceQueueOp::SendDataToAscendDynamic() {
tree_->SetFinished();
MS_LOG(INFO) << "ExecutionTree finished. Device queue sent number of batches: " << send_batch;
mindspore::DataQueueHandler::Close(channel_name_);
mindspore::DataQueueHandler::CloseConfirm();
device::DataQueueMgr::GetInstance().Close(channel_name_);
device::DataQueueMgr::GetInstance().CloseConfirm();
#endif
return Status::OK();
}
} // namespace dataset

View File

@ -27,30 +27,20 @@
#include "minddata/dataset/engine/perf/device_queue_tracing.h"
#include "minddata/dataset/util/status.h"
#include "mindspore/core/utils/data_queue_handler.h"
#ifdef ENABLE_DUMP_IR
#include "minddata/dataset/util/rdr.h"
#endif
#ifdef ENABLE_TDTQUE
#include "minddata/dataset/util/queue.h"
#include "minddata/dataset/engine/device_queue_impl/device_queue_base.h"
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_plugin.h"
#include "minddata/dataset/engine/device_queue_impl/host_queue/host_queue_plugin.h"
#endif
#ifdef ENABLE_GPUQUE
#include "minddata/dataset/engine/gpu_item_connector.h"
#include "minddata/dataset/util/circular_pool.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#endif
#include "mindspore/ccsrc/include/backend/data_queue/data_queue.h"
namespace mindspore {
namespace dataset {
class GpuConnector;
using DATA_INFO = std::vector<std::pair<DataType, TensorShape>>;
using DATA_INFO_QUEUE = Queue<DATA_INFO>;
using mindspore::device::BlockQueueStatus_T;
using mindspore::device::DataQueueItem;
using mindspore::device::DataQueueStatus;
constexpr int32_t kTimeOutMilliSeconds = 25000;
const int kDataInfoQueueCapacity = 128;
@ -83,13 +73,9 @@ class DeviceQueueOp : public PipelineOp {
stop_send_ = false;
}
#ifdef ENABLE_TDTQUE
void StopWaiting() { ascend_keep_waiting_ = false; }
#endif
#ifdef ENABLE_GPUQUE
Status ClearDevice();
#endif
Status GetDataInfo(DATA_INFO *data_info);
@ -136,7 +122,6 @@ class DeviceQueueOp : public PipelineOp {
bool NoExceptionRaised();
Status SendDataToAscendDynamic();
#ifdef ENABLE_TDTQUE
void WaitContinueSignal() const;
Status SendDataToAscend();
Status SendEpochEndToAscend(const TensorRow &curr_row, const bool &is_profiling_enable, int32_t *tdt_cost,
@ -144,11 +129,9 @@ class DeviceQueueOp : public PipelineOp {
void LimitSendingBatches(int64_t send_batch, int64_t *sending_num, std::shared_ptr<ConfigManager> cfg);
Status SendRowToTdt(TensorRow curr_row, bool is_profiling_enable, int32_t *tdt_cost);
// check status that push data into device
Status CheckPushStatus(Status status, bool stop_send, bool *send_finished, bool *is_break_loop);
Status CheckPushStatus(DataQueueStatus status, bool stop_send, bool *send_finished, bool *is_break_loop);
bool ascend_keep_waiting_;
#endif
#ifdef ENABLE_GPUQUE
Status SendDataToGPU();
Status MallocForGPUData(std::vector<device::DataQueueItem> *items, const TensorRow &curr_row,
const int32_t &worker_id);
@ -166,11 +149,6 @@ class DeviceQueueOp : public PipelineOp {
const uint32_t kDeviceQueGpuThreadMemory = 1024;
uint32_t num_workers_;
uint32_t queue_capacity_;
// This rank_id is for device_queue, one process work with only one rank_id,
// for standalone scenario, this rank_id may come from env 'CUDA_VISIBLE_DEVICES',
// but for distribute scenario, this rank_id come from _get_global_rank() in python
uint32_t rank_id_;
#endif
Status SendDataToCPU();
#ifndef ENABLE_SECURITY
@ -195,10 +173,8 @@ class DeviceQueueOp : public PipelineOp {
std::mutex data_info_mutex_;
bool first_push_flag_; // default: false, when first push, it will be true
bool dynamic_shape_{false};
std::shared_ptr<device::DataQueue> ascend_data_queue_;
#ifdef ENABLE_TDTQUE
std::shared_ptr<DeviceQueueBase> tdtInstancePtr;
#endif
#ifdef ENABLE_DUMP_IR
std::shared_ptr<MDChannelInfo> md_channel_info_;
#endif

View File

@ -1,11 +0,0 @@
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
if(ENABLE_TDTQUE)
add_subdirectory(tdt)
add_subdirectory(host_queue)
endif()
add_library(engine-device-queue OBJECT device_queue_base.cc)
if(ENABLE_TDTQUE)
add_dependencies(engine-device-queue engine-tdt)
add_dependencies(engine-device-queue engine-host-queue)
endif()

View File

@ -1,95 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/device_queue_impl/device_queue_base.h"
#include "utils/ms_utils.h"
#ifndef ENABLE_SECURITY
#include "minddata/dataset/engine/perf/profiling.h"
#endif
#include "minddata/dataset/util/log_adapter.h"
#if ENABLE_D
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#endif
namespace mindspore {
namespace dataset {
constexpr auto kUnknownErrorString = "Unknown error occurred";
extern const bool kIsHeterogeneous = []() noexcept -> bool {
int32_t is_heterogeneous = 0;
(void)rtGetIsHeterogenous(&is_heterogeneous);
return is_heterogeneous == 1;
}();
DeviceQueueBase::DeviceQueueBase(const std::string &channel_name, int32_t device_id) {
// init ErrorManager, 0 means success
if (ErrorManager::GetInstance().Init() != 0) {
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
}
channel_name_ = channel_name;
device_id_ = device_id;
}
Status DeviceQueueBase::GetAclDataType(DataType d_type, aclDataType *datatype) {
switch (d_type.value()) {
case DataType::DE_BOOL:
*datatype = ACL_BOOL;
break;
case DataType::DE_INT8:
*datatype = ACL_INT8;
break;
case DataType::DE_UINT8:
*datatype = ACL_UINT8;
break;
case DataType::DE_INT16:
*datatype = ACL_INT16;
break;
case DataType::DE_UINT16:
*datatype = ACL_UINT16;
break;
case DataType::DE_INT32:
*datatype = ACL_INT32;
break;
case DataType::DE_UINT32:
*datatype = ACL_UINT32;
break;
case DataType::DE_FLOAT16:
*datatype = ACL_FLOAT16;
break;
case DataType::DE_FLOAT32:
*datatype = ACL_FLOAT;
break;
case DataType::DE_FLOAT64:
*datatype = ACL_DOUBLE;
break;
case DataType::DE_INT64:
*datatype = ACL_INT64;
break;
case DataType::DE_UINT64:
*datatype = ACL_UINT64;
break;
default:
RETURN_STATUS_UNEXPECTED("Invalid data, got unexpected data type.");
}
return Status::OK();
}
void DeviceQueueBase::ReportErrorMessage() {
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
}
}
} // namespace dataset
} // namespace mindspore

View File

@ -1,65 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_DEVICE_QUEUE_IMPL_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_DEVICE_QUEUE_IMPL_H_
#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <thread>
#include "acl/acl_tdt.h"
#include "runtime/rt_mem_queue.h"
#include "runtime/dev.h"
#include "runtime/config.h"
#include "graph/def_types.h"
#include "common/util/error_manager/error_manager.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_row.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
extern const bool kIsHeterogeneous;
class DeviceQueueBase {
public:
virtual Status hostPush(TensorRow ts_row, bool profiling, int32_t *time,
acltdtTensorType tdt_type = ACL_TENSOR_DATA_TENSOR) = 0;
DeviceQueueBase(const std::string &channel_name, int32_t device_id);
virtual ~DeviceQueueBase() {}
acltdtChannelHandle *acl_handle_;
protected:
Status GetAclDataType(DataType d_type, aclDataType *datatype);
void ReportErrorMessage();
std::string channel_name_;
int32_t device_id_ = 0;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_DEVICE_QUEUE_IMPL_H_

View File

@ -1,3 +0,0 @@
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(engine-host-queue OBJECT host_queue_plugin.cc)

View File

@ -1,300 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/device_queue_impl/host_queue/host_queue_plugin.h"
#include "utils/ms_utils.h"
#ifndef ENABLE_SECURITY
#include "minddata/dataset/engine/perf/profiling.h"
#endif
#include "minddata/dataset/util/log_adapter.h"
namespace mindspore {
namespace dataset {
constexpr auto kMbufHeadEndOfSequencePos = 128U;
constexpr auto kEndOfSequenceFlag = 0x5A;
constexpr auto kTransIdOffset = 64UL;
constexpr auto kSleepMilliSeconds = 500;
const std::map<acltdtTensorType, int32_t> type_map = {
{ACL_TENSOR_DATA_TENSOR, 0}, {ACL_TENSOR_DATA_END_OF_SEQUENCE, 1}, {ACL_TENSOR_DATA_ABNORMAL, 2}};
HostQueueImpl::HostQueueImpl(const std::string &channel_name, int32_t device_id)
: DeviceQueueBase(channel_name, device_id) {
auto status = HostQueueInit();
if (status != Status::OK()) {
MS_LOG(WARNING) << "Host queue init failed.";
}
}
Status HostQueueImpl::hostPush(TensorRow ts_row, bool profiling, int32_t *time, acltdtTensorType tdt_type) {
#ifndef ENABLE_SECURITY
double start_time;
if (profiling) {
start_time = ProfilingTime::GetCurMilliSecond();
}
#endif
// send data by datagw
auto status = SendDataByHostQueue(ts_row, tdt_type);
if (status != Status::OK()) {
RETURN_STATUS_UNEXPECTED("Host queue send data failed.");
}
#ifndef ENABLE_SECURITY
if (profiling) {
double end_time = ProfilingTime::GetCurMilliSecond();
*time = static_cast<int32_t>(end_time - start_time);
}
#endif
return status;
}
Status HostQueueImpl::HostQueueInit() {
auto ret = rtSetDevice(device_id_);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtSetDevice failed, ret =" << ret;
RETURN_STATUS_UNEXPECTED("Init:rtSetDevice failed.");
}
ret = rtMemQueueInit(device_id_);
if (ret != ACL_RT_SUCCESS && ret != ACL_ERROR_RT_REPEATED_INIT) {
MS_LOG(ERROR) << "call rtMemQueueInit failed, ret =" << ret;
RETURN_STATUS_UNEXPECTED("Init:rtMemQueueInit failed.");
}
rtMemQueueAttr_t attr = {};
auto mem_ret = memset_s(attr.name, RT_MQ_MAX_NAME_LEN, 0, RT_MQ_MAX_NAME_LEN);
if (mem_ret != EOK) {
MS_LOG(ERROR) << "call memset_s failed, ret =" << mem_ret;
RETURN_STATUS_UNEXPECTED("Init:memset_s failed.");
}
mem_ret = memcpy_s(attr.name, RT_MQ_MAX_NAME_LEN, channel_name_.c_str(), channel_name_.size() + 1);
if (mem_ret != EOK) {
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
RETURN_STATUS_UNEXPECTED("Init:memcpy_s failed.");
}
attr.depth = 128U;
attr.workMode = RT_MQ_MODE_DEFAULT;
attr.flowCtrlFlag = false;
attr.flowCtrlDropTime = 0U;
attr.overWriteFlag = false;
ret = rtMemQueueCreate(device_id_, &attr, &queue_id_);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMemQueueCreate failed, ret =" << ret;
RETURN_STATUS_UNEXPECTED("Init:rtMemQueueCreate failed.");
}
rtMemBuffCfg_t buff_cfg = {0};
ret = rtMbufInit(&buff_cfg);
if (ret != ACL_RT_SUCCESS && ret != ACL_ERROR_RT_REPEATED_INIT) {
MS_LOG(ERROR) << "call rtMbufInit failed, ret =" << ret;
RETURN_STATUS_UNEXPECTED("Init:rtMbufInit failed.");
}
const std::lock_guard<std::mutex> lk(queue_id_to_trans_id_map_mutex);
(void)queue_id_to_trans_id_map.insert({queue_id_, 0UL});
return Status::OK();
}
Status HostQueueImpl::AddDataItemInfo(const acltdtTensorType &tdt_data_type, const int32_t &tensor_type,
const int64_t *dims, const size_t &dim_size, void *data_ptr,
const uint64_t &data_len, std::vector<DataItemInfo> *items) {
DataItemInfo item = {};
int32_t data_type = 0;
auto it = type_map.find(tdt_data_type);
if (it != type_map.end()) {
data_type = it->second;
}
item.ctrl_info.data_type = data_type;
item.ctrl_info.tensor_type = tensor_type;
item.ctrl_info.dim_num = dim_size;
item.ctrl_info.data_len = data_len;
item.dims.clear();
for (size_t i = 0; i < dim_size; ++i) {
item.dims.push_back(dims[i]);
}
item.data_ptr = data_ptr;
items->push_back(item);
return Status::OK();
}
Status HostQueueImpl::CreateDataItemInfos(const acltdtTensorType &acl_type, const TensorRow &ts_row,
std::vector<DataItemInfo> *items) {
if (acl_type != ACL_TENSOR_DATA_TENSOR) {
return AddDataItemInfo(acl_type, ACL_BOOL, nullptr, 0UL, nullptr, 0UL, items);
}
for (auto &ts : ts_row) {
aclDataType data_type;
RETURN_IF_NOT_OK(GetAclDataType(ts->type(), &data_type));
TensorShape tsShape = ts->shape();
std::shared_ptr<void> dataPtr =
std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](const void *elem) {});
size_t dataLen = ts->SizeInBytes();
const dsize_t dims = tsShape.Rank();
std::vector<int64_t> dataShape;
for (auto i = 0; i < dims; i++) {
dataShape.emplace_back(tsShape[i]);
}
if (ts->type() != DataType::DE_STRING) {
RETURN_IF_NOT_OK(AddDataItemInfo(ACL_TENSOR_DATA_TENSOR, data_type, (tsShape.empty() ? nullptr : &dataShape[0]),
dims, dataPtr.get(), dataLen, items));
} else {
RETURN_STATUS_UNEXPECTED("Create data item failed when send data with type:" + std::to_string(data_type));
}
}
return Status::OK();
}
Status HostQueueImpl::SerializeDataItemInfos(std::vector<DataItemInfo> *items, void **buff,
const acltdtTensorType &acl_type) {
size_t cnt = items->size();
size_t total_size = 0UL;
for (size_t i = 0UL; i < cnt; ++i) {
(*items)[i].ctrl_info.cur_cnt = i;
(*items)[i].ctrl_info.cnt = cnt;
total_size += sizeof(ItemInfo) + (*items)[i].ctrl_info.dim_num * sizeof(int64_t) + (*items)[i].ctrl_info.data_len;
}
auto rt_error = rtMbufAlloc(buff, total_size);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufAlloc with size[" << total_size << "] failed, ret = " << rt_error;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo: rtMbufAlloc failed.");
}
void *data = nullptr;
rt_error = rtMbufGetBuffAddr(*buff, &data);
if (rt_error != ACL_RT_SUCCESS) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call rtMbufGetBuffAddr with size[" << total_size << "] failed, ret = " << rt_error;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo:rtMbufGetBuffAddr failed.");
}
void *head_buf = nullptr;
uint64_t head_size = 0UL;
rt_error = rtMbufGetPrivInfo(*buff, &head_buf, &head_size);
if (rt_error != ACL_RT_SUCCESS) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call rtMbufGetPrivInfo failed, ret =" << rt_error;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo:rtMbufGetPrivInfo failed.");
}
if ((head_buf != nullptr) && (head_size > kMbufHeadEndOfSequencePos)) {
MS_LOG(DEBUG) << "host queue set end_of_sequence mbuf head.";
}
size_t offset = 0UL;
for (size_t i = 0UL; i < cnt; ++i) {
auto mem_ret = memcpy_s(ge::ValueToPtr(ge::PtrToValue(data) + offset), sizeof(ItemInfo), &((*items)[i].ctrl_info),
sizeof(ItemInfo));
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo:memcpy_s failed.");
}
offset += sizeof(ItemInfo);
for (size_t j = 0UL; j < (*items)[i].ctrl_info.dim_num; ++j) {
mem_ret = memcpy_s(ge::ValueToPtr(ge::PtrToValue(data) + offset), sizeof(int64_t), &((*items)[i].dims[j]),
sizeof(int64_t));
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo:memcpy_s failed.");
}
offset += sizeof(int64_t);
}
if ((*items)[i].ctrl_info.data_len == 0UL) {
continue;
}
mem_ret = memcpy_s(ge::ValueToPtr(ge::PtrToValue(data) + offset), (*items)[i].ctrl_info.data_len,
(*items)[i].data_ptr, (*items)[i].ctrl_info.data_len);
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
RETURN_STATUS_UNEXPECTED("SerializeDataItemInfo:memcpy_s failed.");
}
offset += (*items)[i].ctrl_info.data_len;
}
return Status::OK();
}
Status HostQueueImpl::LaunchTensor2MBuff(const acltdtTensorType &acl_type, const TensorRow &tensor_row, void **buff) {
std::vector<DataItemInfo> items;
RETURN_IF_NOT_OK(CreateDataItemInfos(acl_type, tensor_row, &items));
RETURN_IF_NOT_OK(SerializeDataItemInfos(&items, buff, acl_type));
RETURN_IF_NOT_OK(SetTransId4MBuf(buff));
return Status::OK();
}
Status HostQueueImpl::SetTransId4MBuf(void **buff) {
void *head_buff = nullptr;
uint64_t head_size = 0UL;
auto ret = rtMbufGetPrivInfo(*buff, &head_buff, &head_size);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufGetPrivInfo failed, ret =" << ret;
RETURN_STATUS_UNEXPECTED("HostQueueSetTransId:rtMbufGetPrivInfo failed.");
}
uint64_t *trans_id = reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(head_buff) + head_size - kTransIdOffset);
const std::lock_guard<std::mutex> lk(queue_id_to_trans_id_map_mutex);
*trans_id = ++queue_id_to_trans_id_map[queue_id_];
MS_LOG(DEBUG) << "host queue[" << queue_id_ << "] set trans id[" << *trans_id << "] success";
return Status::OK();
}
Status HostQueueImpl::EnqueueData(void *buff, bool *need_resend) {
*need_resend = false;
auto rt_error = rtSetDevice(device_id_);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtSetDevice device failed, ret=" << rt_error;
RETURN_STATUS_UNEXPECTED("rtSetDevice failed.");
}
rt_error = rtMemQueueEnQueue(device_id_, queue_id_, buff);
if (rt_error == RT_ERROR_NONE) {
return Status::OK();
} else if (rt_error == ACL_ERROR_RT_QUEUE_FULL) {
*need_resend = true;
MS_LOG(DEBUG) << "queue[" << queue_id_ << "] is full, need call rtMemQueueEnQueue again";
} else {
HostQueueFreeBuff(buff);
MS_LOG(ERROR) << "host enqueue queue[" << queue_id_ << "] failed, ret = " << rt_error;
RETURN_STATUS_UNEXPECTED("host enqueue queue failed.");
}
return Status::OK();
}
Status HostQueueImpl::SendDataByHostQueue(const TensorRow &tensor_row, const acltdtTensorType &data_type) {
Status status;
bool is_need_resend = false;
void *buff = nullptr;
// Status status;
RETURN_IF_NOT_OK(LaunchTensor2MBuff(data_type, tensor_row, &buff));
do {
if (is_need_resend) {
std::this_thread::sleep_for(std::chrono::milliseconds(kSleepMilliSeconds));
}
status = EnqueueData(buff, &is_need_resend);
} while (status == Status::OK() && is_need_resend);
return Status::OK();
}
void HostQueueImpl::HostQueueFreeBuff(void *buff) {
auto rt_error = rtMbufFree(buff);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufFree failed, ret=" << rt_error;
}
}
} // namespace dataset
} // namespace mindspore

View File

@ -1,91 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_HOST_QUEUE_HOST_QUEUE_PLUGIN_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_HOST_QUEUE_HOST_QUEUE_PLUGIN_H_
#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <thread>
#include "acl/acl_tdt.h"
#include "runtime/rt_mem_queue.h"
#include "runtime/dev.h"
#include "runtime/config.h"
#include "graph/def_types.h"
#include "common/util/error_manager/error_manager.h"
#include "minddata/dataset/engine/device_queue_impl/device_queue_base.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_row.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
struct ItemInfo {
int32_t version;
int32_t data_type;
uint32_t cur_cnt;
uint32_t cnt;
int32_t tensor_type;
uint32_t dim_num;
char reserved[32];
uint64_t data_len;
};
struct DataItemInfo {
ItemInfo ctrl_info;
std::vector<int64_t> dims;
void *data_ptr;
};
class HostQueueImpl : public DeviceQueueBase {
public:
Status hostPush(TensorRow ts_row, bool profiling, int32_t *time, acltdtTensorType tdt_type = ACL_TENSOR_DATA_TENSOR);
HostQueueImpl(const std::string &channel_name, int32_t device_id);
~HostQueueImpl() {}
private:
Status HostQueueInit();
Status SendDataByHostQueue(const TensorRow &tensor_row, const acltdtTensorType &data_type);
Status SetTransId4MBuf(void **buff);
Status LaunchTensor2MBuff(const acltdtTensorType &acl_type, const TensorRow &tensor_row, void **buff);
Status EnqueueData(void *buff, bool *need_resend);
Status CreateDataItemInfos(const acltdtTensorType &acl_type, const TensorRow &ts_row,
std::vector<DataItemInfo> *items);
Status SerializeDataItemInfos(std::vector<DataItemInfo> *items, void **buff, const acltdtTensorType &acl_type);
Status AddDataItemInfo(const acltdtTensorType &tdt_data_type, const int32_t &tensor_type, const int64_t *dims,
const size_t &dim_size, void *data_ptr, const uint64_t &data_len,
std::vector<DataItemInfo> *items);
void HostQueueFreeBuff(void *buff);
std::mutex queue_id_to_trans_id_map_mutex;
std::map<uint32_t, uint64_t> queue_id_to_trans_id_map;
uint32_t queue_id_ = 0;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_HOST_QUEUE_HOST_QUEUE_PLUGIN_H_

View File

@ -1,3 +0,0 @@
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(engine-tdt OBJECT tdt_plugin.cc tdt_handle.cc)

View File

@ -1,20 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_handle.h"
// Implementation code for TdtHandle moved from 'tdt_handle.cc' to 'tdt_handle.h';
// This is a workaround for cyclic dependency problem between dataset and core
// when ENABLE_TDTQUE is enabled.

View File

@ -1,84 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_HANDLE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_HANDLE_H_
#include <iostream>
#include <map>
#include <thread>
#include <utility>
#include "minddata/dataset/util/log_adapter.h"
#include "acl/acl_tdt.h"
namespace mindspore {
namespace dataset {
class TdtHandle {
public:
static inline void AddHandle(acltdtChannelHandle **handle, std::thread *use_thread);
static inline bool DestroyHandle();
static inline void DelHandle(acltdtChannelHandle **handle);
private:
TdtHandle() {}
~TdtHandle() = default;
};
inline void TdtHandle::AddHandle(acltdtChannelHandle **handle, std::thread *use_thread) {
if (*handle != nullptr) {
auto ret = acl_handle_map.emplace(reinterpret_cast<void **>(handle), use_thread);
if (!std::get<1>(ret)) {
MS_LOG(ERROR) << "Failed to add new handle to acl_handle_map." << std::endl;
}
}
}
inline void TdtHandle::DelHandle(acltdtChannelHandle **handle) {
void **void_handle = reinterpret_cast<void **>(handle);
acl_handle_map.erase(void_handle);
}
inline bool TdtHandle::DestroyHandle() {
bool destroy_all = true;
for (auto &item : acl_handle_map) {
acltdtChannelHandle **handle = reinterpret_cast<acltdtChannelHandle **>(item.first);
if (*handle != nullptr) {
aclError stop_status = acltdtStopChannel(*handle);
if (stop_status != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed stop acl data channel and the stop status is " << stop_status << std::endl;
return false;
}
if (item.second != nullptr && item.second->joinable()) {
item.second->join();
}
if (acltdtDestroyChannel(*handle) != ACL_SUCCESS) {
destroy_all = false;
} else {
*handle = nullptr;
}
}
}
// clear the map container when all the handle has been destroyed
if (destroy_all) {
acl_handle_map.clear();
}
return destroy_all;
}
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_HANDLE_H_

View File

@ -1,205 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_plugin.h"
#include "utils/ms_utils.h"
#ifndef ENABLE_SECURITY
#include "minddata/dataset/engine/perf/profiling.h"
#endif
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/task_manager.h"
#if ENABLE_D
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#endif
namespace mindspore {
namespace dataset {
TdtPlugin::TdtPlugin(const std::string &channel_name, int32_t device_id) : DeviceQueueBase(channel_name, device_id) {
// init ErrorManager, 0 means success
if (ErrorManager::GetInstance().Init() != 0) {
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
}
// create acl tdt handle
acl_handle_ = acltdtCreateChannel(device_id, channel_name.c_str());
if (acl_handle_ == nullptr) {
MS_LOG(ERROR) << "Create channel for sending data failed, please check DEVICE ID setting, DEVICE ID that passed "
"into dataset(from context) and training process should be the same.";
ReportErrorMessage();
}
TdtHandle::AddHandle(&acl_handle_, nullptr);
}
TdtPlugin::~TdtPlugin() {
if (acl_handle_ != nullptr) {
if (acltdtDestroyChannel(acl_handle_) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed to destroy channel for tdt queue.";
ReportErrorMessage();
} else {
TdtHandle::DelHandle(&acl_handle_);
acl_handle_ = nullptr;
}
}
}
Status TdtPlugin::hostPush(TensorRow ts_row, bool profiling, int32_t *time, acltdtTensorType tdt_type) {
MS_LOG(DEBUG) << "TDT channel name is " << channel_name_ << ".";
acltdtDataset *acl_dataset = nullptr;
#ifndef ENABLE_SECURITY
double start_time;
#endif
auto ret = translate(tdt_type, ts_row, &acl_dataset);
if (ret != Status::OK()) {
DestroyAclDataset(acl_dataset);
RETURN_STATUS_UNEXPECTED("Converting into TDT tensor failed!");
}
#ifndef ENABLE_SECURITY
if (profiling) {
start_time = ProfilingTime::GetCurMilliSecond();
}
#endif
#if ENABLE_D
// Data prefetch only when PS mode enables cache.
if (acltdtGetDatasetSize(acl_dataset) > 0) {
acltdtDataItem *item0 = acltdtGetDataItem(acl_dataset, 0);
std::string item_type;
RETURN_IF_NOT_OK(ParseType(acltdtGetDataTypeFromItem(item0), &item_type));
if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, acltdtGetDataAddrFromItem(item0),
acltdtGetDataSizeFromItem(item0), item_type)) {
RETURN_STATUS_UNEXPECTED("PrefetchData failed in when pre-processing sending data.");
}
}
#endif
auto status = acltdtSendTensor(acl_handle_, acl_dataset, -1);
DestroyAclDataset(acl_dataset);
if (status != ACL_SUCCESS) {
// if the device_queue thread had been interrupted by master, just print warning and return success
if (mindspore::dataset::this_thread::is_interrupted()) {
MS_LOG(WARNING) << "Device queue thread had been interrupted by TdtHandle::DestroyHandle, you can ignore "
<< "the above error: 'failed to send...'. In this scenario, the training ends first without "
<< "using all epoch(s) data, and the data preprocessing is blocked by the data "
<< "transmission channel on the device side. So we force the data transmission channel to stop.";
return Status::OK();
}
ReportErrorMessage();
RETURN_STATUS_UNEXPECTED("Tdt Send data failed.");
}
#ifndef ENABLE_SECURITY
if (profiling) {
double end_time = ProfilingTime::GetCurMilliSecond();
*time = static_cast<int32_t>(end_time - start_time);
}
#endif
return Status::OK();
}
Status TdtPlugin::ParseType(const aclDataType &acl_data_type, std::string *data_type) {
auto type_iter = parse_map.find(acl_data_type);
if (type_iter == parse_map.end()) {
RETURN_STATUS_UNEXPECTED("Got unsupported acl datatype: " + std::to_string(acl_data_type));
}
*data_type = type_iter->second;
return Status::OK();
}
Status TdtPlugin::translate(acltdtTensorType tdt_type, const TensorRow &ts_row, acltdtDataset **output_acl_dataset) {
auto acl_dataset = acltdtCreateDataset();
if (acl_dataset == nullptr) {
RETURN_STATUS_UNEXPECTED("Create tdt dataset failed.");
}
auto status = AssembleTensor2AclDataset(tdt_type, ts_row, acl_dataset);
if (status != Status::OK()) {
DestroyAclDataset(acl_dataset);
RETURN_STATUS_UNEXPECTED("Assemble tensor row to tdt dataset failed.");
}
*output_acl_dataset = acl_dataset;
return Status::OK();
}
Status TdtPlugin::AssembleTensor2AclDataset(acltdtTensorType tdt_type, const TensorRow &ts_row,
acltdtDataset *acl_dataset) {
if (tdt_type != ACL_TENSOR_DATA_TENSOR || ts_row.size() == 0) {
acltdtDataItem *acl_data = acltdtCreateDataItem(tdt_type, nullptr, 0, ACL_BOOL, nullptr, 0);
if (acl_data == nullptr) {
RETURN_STATUS_UNEXPECTED("Create data item failed when send data with type:" + std::to_string(tdt_type));
}
if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_SUCCESS) {
if (acltdtDestroyDataItem(acl_data) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Destroy data item failed when send data with type: " << tdt_type;
}
RETURN_STATUS_UNEXPECTED("Add data item to tdt dataset failed when send data.");
}
return Status::OK();
}
for (auto ts : ts_row) {
aclDataType datatype;
acltdtDataItem *acl_data = nullptr;
RETURN_IF_NOT_OK(GetAclDataType(ts->type(), &datatype));
TensorShape tsShape = ts->shape();
std::string dataShapes = "[";
for (auto dim : tsShape.AsVector()) {
(void)dataShapes.append(std::to_string(dim)).append(",");
}
dataShapes.pop_back();
(void)dataShapes.append("]");
std::shared_ptr<void> dataPtr =
std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](const void *elem) {});
size_t dataLen = ts->SizeInBytes();
const dsize_t dims = tsShape.Rank();
std::vector<int64_t> dataShape;
for (auto i = 0; i < dims; i++) {
dataShape.emplace_back(tsShape[i]);
}
acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_TENSOR, (tsShape.empty() ? nullptr : &dataShape[0]), dims, datatype,
dataPtr.get(), dataLen);
if (acl_data == nullptr) {
RETURN_STATUS_UNEXPECTED("Create data item failed when send data.");
}
if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_SUCCESS) {
if (acltdtDestroyDataItem(acl_data) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Destroy data item failed when send data with type ACL_TENSOR_DATA_TENSOR.";
}
RETURN_STATUS_UNEXPECTED("Add data item to tdt dataset failed when send data.");
}
MS_LOG(DEBUG) << "TDT data type is TDT_TENSOR, tensor type is " << datatype << ", tensor shape is " << dataShapes
<< ", data length is " << ts->Size() << ".";
}
return Status::OK();
}
Status TdtPlugin::DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item) {
if (include_data_item) {
for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) {
if (acltdtDestroyDataItem(acltdtGetDataItem(acl_dataset, i)) != ACL_SUCCESS) {
ReportErrorMessage();
RETURN_STATUS_UNEXPECTED("Destroy data item failed when send data.");
}
}
}
if (acltdtDestroyDataset(acl_dataset) != ACL_SUCCESS) {
ReportErrorMessage();
RETURN_STATUS_UNEXPECTED("Destroy tdt dataset failed when send data.");
}
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -1,71 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_PLUGIN_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_PLUGIN_H_
#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <thread>
#include "acl/acl_tdt.h"
#include "runtime/rt_mem_queue.h"
#include "runtime/dev.h"
#include "runtime/config.h"
#include "graph/def_types.h"
#include "common/util/error_manager/error_manager.h"
#include "minddata/dataset/engine/device_queue_impl/device_queue_base.h"
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_handle.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_row.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
class TdtPlugin : public DeviceQueueBase {
public:
static std::shared_ptr<TdtPlugin> GetInstance();
Status hostPush(TensorRow ts_row, bool profiling, int32_t *time, acltdtTensorType tdt_type = ACL_TENSOR_DATA_TENSOR);
TdtPlugin(const std::string &channel_name, int32_t device_id);
~TdtPlugin();
private:
Status DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item = true);
Status AssembleTensor2AclDataset(acltdtTensorType tdt_type, const TensorRow &ts_row, acltdtDataset *acl_dataset);
Status ParseType(const aclDataType &acl_data_type, std::string *data_type);
Status translate(acltdtTensorType tdt_type, const TensorRow &ts_row, acltdtDataset **output_acl_dataset);
void *tdt_handle_ = nullptr;
const std::map<aclDataType, std::string> parse_map = {
{ACL_INT8, "int8"}, {ACL_UINT8, "uint8"}, {ACL_INT16, "int16"}, {ACL_UINT16, "uint16"},
{ACL_INT32, "int32"}, {ACL_UINT32, "uint32"}, {ACL_INT64, "int64"}, {ACL_UINT64, "uint64"},
{ACL_FLOAT16, "float16"}, {ACL_FLOAT, "float32"}, {ACL_DOUBLE, "float64"}, {ACL_BOOL, "bool"}};
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DEVICE_QUEUE_IMPL_TDT_TDT_PLUGIN_H_

View File

@ -19,7 +19,7 @@
#include <limits>
#include "minddata/dataset/engine/datasetops/dataset_op.h"
#include "minddata/dataset/engine/datasetops/device_queue_op.h"
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
#ifdef WITH_BACKEND
#include "mindspore/core/utils/numa_interface.h"
#endif
#include "minddata/dataset/util/task_manager.h"
@ -28,7 +28,7 @@
namespace mindspore {
namespace dataset {
// Constructor
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
#ifdef WITH_BACKEND
ExecutionTree::ExecutionTree() : ExecutionTree(GlobalContext::config_manager()) {}
ExecutionTree::ExecutionTree(std::shared_ptr<ConfigManager> cfg)
@ -52,7 +52,7 @@ ExecutionTree::ExecutionTree() : id_count_(0), tree_state_(kDeTStateInit), prepa
// Destructor
ExecutionTree::~ExecutionTree() {
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
#ifdef WITH_BACKEND
if (numa_enable_) {
handle_ = nullptr;
}
@ -152,7 +152,7 @@ void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp
Status ExecutionTree::Launch() {
// opencv limit too many threads
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__) && !defined(ENABLE_ANDROID)
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
#ifdef WITH_BACKEND
// Here we do numa bind for performance optimization, as our test result,
// if we do numa bind when get_dataset_size launch a tree, we'll get a
// better performance than only we do numa bind at the time _To_Device

View File

@ -229,7 +229,7 @@ class ExecutionTree {
TreeState tree_state_; // Tracking the current tree state
std::string unique_id_; // A unique identifier for the tree
#if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
#ifdef WITH_BACKEND
// Constructor for if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
explicit ExecutionTree(std::shared_ptr<ConfigManager> cfg);
// This rank_id is for numa and device_queue, one process work with only one rank_id,

View File

@ -16,7 +16,6 @@
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_GPU_ITEM_CONNECTOR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_GPU_ITEM_CONNECTOR_H_
#ifdef ENABLE_GPUQUE
#include <memory>
#include <string>
#include <utility>
@ -24,7 +23,7 @@
#include "minddata/dataset/engine/connector.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/include/dataset/constants.h"
#include "runtime/data_queue/blocking_queue.h"
#include "include/backend/data_queue/blocking_queue.h"
using mindspore::device::DataQueueItem;
@ -88,5 +87,4 @@ class GpuConnector : public Connector<GpuConnectorItem> {
};
} // namespace dataset
} // namespace mindspore
#endif // ENABLE_GPUQUE
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_GPU_ITEM_CONNECTOR_H_

View File

@ -20,17 +20,17 @@
#include <algorithm>
#include "utils/ms_utils.h"
#include "minddata/dataset/util/path.h"
#ifdef ENABLE_GPUQUE
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/global_context.h"
#endif
#include "minddata/dataset/engine/perf/monitor.h"
#include "minddata/dataset/engine/perf/connector_size.h"
#include "minddata/dataset/engine/perf/cpu_sampler.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/engine/tree_adapter.h"
#include "minddata/dataset/util/log_adapter.h"
#ifdef WITH_BACKEND
#include "utils/ms_context.h"
#endif
namespace mindspore {
namespace dataset {
@ -824,18 +824,19 @@ ProfilingManager::ProfilingRegistrationState ProfilingManager::GetProfilerTreeSt
}
std::string ProfilingManager::GetRankID() const {
std::string rank_id;
#ifdef ENABLE_GPUQUE
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
int32_t rank_id_int = cfg->rank_id();
// If DEVICE_ID is not set, default value is 0
if (rank_id_int < 0) {
rank_id = common::GetEnv("DEVICE_ID");
} else {
rank_id = std::to_string(rank_id_int);
std::string rank_id = common::GetEnv("RANK_ID");
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
int32_t rank_id_int = cfg->rank_id();
// If DEVICE_ID is not set, default value is 0
if (rank_id_int < 0) {
rank_id = common::GetEnv("DEVICE_ID");
} else {
rank_id = std::to_string(rank_id_int);
}
}
#else
rank_id = common::GetEnv("RANK_ID");
#endif
// If RANK_ID is not set, default value is 0
if (rank_id.empty()) {

View File

@ -18,7 +18,9 @@
#include <utility>
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/system_pool.h"
#ifdef WITH_BACKEND
#include "mindspore/ccsrc/include/backend/data_queue/data_queue_mgr.h"
#endif
namespace mindspore {
namespace dataset {
struct MemHdr {
@ -243,29 +245,22 @@ std::ostream &operator<<(std::ostream &os, const ArenaImpl &s) {
Status Arena::Init() {
try {
int64_t sz = size_in_MB_ * 1048576L;
#ifdef ENABLE_GPUQUE
#ifdef WITH_BACKEND
if (is_cuda_malloc_) {
auto ret = cudaHostAlloc(&ptr_, sz, cudaHostAllocDefault);
if (ret != cudaSuccess) {
MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
return Status(StatusCode::kMDOutOfMemory);
}
impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
ptr_ = device::DataQueueMgr::GetInstance().AllocHostMem("GPU", sz);
} else {
RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false));
impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
ptr_ = device::DataQueueMgr::GetInstance().AllocHostMem("Others", sz);
}
#else
RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false));
impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
ptr_ = std::shared_ptr<void>(::malloc(sz), ::free);
#endif
impl_ = std::make_unique<ArenaImpl>(ptr_.get(), sz);
} catch (std::bad_alloc &e) {
return Status(StatusCode::kMDOutOfMemory);
}
return Status::OK();
}
#ifdef ENABLE_GPUQUE
Arena::Arena(size_t val_in_MB, bool is_cuda_malloc)
: ptr_(nullptr), size_in_MB_(val_in_MB), is_cuda_malloc_(is_cuda_malloc) {}
@ -279,19 +274,5 @@ Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB, bool i
RETURN_IF_NOT_OK(ba->Init());
return Status::OK();
}
#else
Arena::Arena(size_t val_in_MB) : ptr_(nullptr), size_in_MB_(val_in_MB) {}
Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB) {
RETURN_UNEXPECTED_IF_NULL(p_ba);
auto ba = new (std::nothrow) Arena(val_in_MB);
if (ba == nullptr) {
return Status(StatusCode::kMDOutOfMemory);
}
(*p_ba).reset(ba);
RETURN_IF_NOT_OK(ba->Init());
return Status::OK();
}
#endif
} // namespace dataset
} // namespace mindspore

View File

@ -22,9 +22,6 @@
#include "minddata/dataset/util/allocator.h"
#include "minddata/dataset/util/memory_pool.h"
#include "minddata/dataset/util/treap.h"
#ifdef ENABLE_GPUQUE
#include <cuda_runtime_api.h>
#endif
#define ARENA_LOG_BLK_SZ (6u)
#define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
@ -107,20 +104,7 @@ class Arena : public MemoryPool {
// Disable copy and assignment constructor
Arena(const Arena &) = delete;
Arena &operator=(const Arena &) = delete;
~Arena() override {
#ifdef ENABLE_GPUQUE
if (is_cuda_malloc_) {
if (ptr_ != nullptr) {
(void)cudaFreeHost(ptr_);
}
}
#else
if (ptr_ != nullptr) {
free(ptr_);
}
ptr_ = nullptr;
#endif
}
~Arena() override = default;
/// As a derived class of MemoryPool, we have to implement the following.
/// But we simply transfer the call to the implementation class
@ -150,28 +134,17 @@ class Arena : public MemoryPool {
os << *(s.impl_);
return os;
}
#ifdef ENABLE_GPUQUE
/// The only method to create an arena.
static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false);
#else
/// The only method to create an arena.
static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096);
#endif
protected:
mutable std::mutex mux_;
std::unique_ptr<ArenaImpl> impl_;
void *ptr_;
std::shared_ptr<void> ptr_;
size_t size_in_MB_;
#ifdef ENABLE_GPUQUE
bool is_cuda_malloc_;
explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false);
#else
explicit Arena(size_t val_in_MB = 4096);
#endif
Status Init();
};

View File

@ -27,11 +27,7 @@ namespace dataset {
Status CircularPool::AddOneArena() {
Status rc;
std::shared_ptr<Arena> b;
#ifdef ENABLE_GPUQUE
RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_, is_cuda_malloc_));
#else
RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_));
#endif
tail_ = b.get();
cur_size_in_mb_ += arena_size_;
mem_segments_.push_back(std::move(b));
@ -198,22 +194,13 @@ int CircularPool::PercentFree() const {
}
}
#ifdef ENABLE_GPUQUE
CircularPool::CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc)
: unlimited_(max_size_in_gb <= 0),
max_size_in_mb_(unlimited_ ? std::numeric_limits<int32_t>::max() : max_size_in_gb * 1024),
arena_size_(arena_size),
is_cuda_malloc_(is_cuda_malloc),
cur_size_in_mb_(0) {}
#else
CircularPool::CircularPool(int max_size_in_gb, int arena_size)
: unlimited_(max_size_in_gb <= 0),
max_size_in_mb_(unlimited_ ? std::numeric_limits<int32_t>::max() : max_size_in_gb * 1024),
arena_size_(arena_size),
cur_size_in_mb_(0) {}
#endif
#ifdef ENABLE_GPUQUE
Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb, int arena_size,
bool createOneArena, bool is_cuda_malloc) {
Status rc;
@ -234,28 +221,6 @@ Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, i
}
return rc;
}
#else
Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb, int arena_size,
bool createOneArena) {
Status rc;
if (out_pool == nullptr) {
RETURN_STATUS_UNEXPECTED("pPool is null");
}
auto pool = new (std::nothrow) CircularPool(max_size_in_gb, arena_size);
if (pool == nullptr) {
RETURN_STATUS_OOM("Out of memory.");
}
if (createOneArena) {
rc = pool->AddOneArena();
}
if (rc.IsOk()) {
(*out_pool).reset(pool);
} else {
delete pool;
}
return rc;
}
#endif
CircularPool::~CircularPool() = default;
} // namespace dataset

View File

@ -85,13 +85,8 @@ class CircularPool : public MemoryPool {
return os;
}
#ifdef ENABLE_GPUQUE
static Status CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb = -1,
int arena_size = 4096, bool create_one_arena = false, bool is_cuda_malloc = false);
#else
static Status CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb = -1,
int arena_size = 4096, bool create_one_arena = false);
#endif
private:
ListOfArenas mem_segments_;
@ -101,16 +96,10 @@ class CircularPool : public MemoryPool {
int arena_size_;
int cur_size_in_mb_;
RWLock rw_lock_;
#ifdef ENABLE_GPU
bool is_cuda_malloc_;
// We can take negative or 0 as input which means unlimited.
CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc);
#else
// We can take negative or 0 as input which means unlimited.
CircularPool(int max_size_in_gb, int arena_size);
#endif
Status AddOneArena();
};

View File

@ -22,12 +22,10 @@
#if defined(__ANDROID__) || defined(ANDROID)
#include "minddata/dataset/util/services.h"
#endif
#ifdef ENABLE_TDTQUE
#include "acl/acl_tdt.h"
#include "tdt/status.h"
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_handle.h"
#ifdef WITH_BACKEND
#include "utils/ms_context.h"
#include "mindspore/ccsrc/include/backend/data_queue/data_queue_mgr.h"
#endif
namespace mindspore {
namespace dataset {
thread_local Task *gMyTask = nullptr;
@ -141,6 +139,10 @@ Status Task::Run() {
}
Status Task::Join(WaitFlag blocking) {
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
std::string device_target = MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET);
#endif
if (running_) {
RETURN_UNEXPECTED_IF_NULL(MyTaskGroup());
auto interrupt_svc = MyTaskGroup()->GetIntrpService();
@ -162,23 +164,27 @@ Status Task::Join(WaitFlag blocking) {
MS_LOG(WARNING) << MyName() << " Thread ID " << ss.str() << " is not responding. Interrupt again";
interrupt_svc->InterruptAll();
wait_times++;
#ifdef ENABLE_TDTQUE
// Because hostPush hung in DeviceQueueOp, wait 5 seconds and destroy the tdt
if (wait_times > 5 && my_name_.find("DeviceQueueOp") != std::string::npos) {
MS_LOG(WARNING) << "Wait " << wait_times << " seconds, "
<< "the task: " << my_name_ << " will be destroyed by TdtHostDestory.";
if (!TdtHandle::DestroyHandle()) {
MS_LOG(WARNING) << "Destroy tdt channel failed.";
} else {
MS_LOG(INFO) << "Destroy tdt channel success.";
}
#ifdef WITH_BACKEND
if (device_target == kAscendDevice) {
// Because hostPush hung in DeviceQueueOp, wait 5 seconds and destroy the tdt
if (wait_times > 5 && my_name_.find("DeviceQueueOp") != std::string::npos) {
MS_LOG(WARNING) << "Wait " << wait_times << " seconds, "
<< "the task: " << my_name_ << " will be destroyed by TdtHostDestory.";
auto queue =
device::DataQueueMgr::GetInstance().CreateDataQueue(kAscendDevice, {}, false, 0, nullptr, {});
if (queue != nullptr && !queue->Destroy()) {
MS_LOG(WARNING) << "Destroy tdt channel failed.";
} else {
MS_LOG(INFO) << "Destroy tdt channel success.";
}
// just wait 30 seconds
// case1: cpu usage 100%, DeviceQueueOp thread may destroy without thrd_ future
if (wait_times > kWaitInterruptTaskTime) {
MS_LOG(WARNING) << MyName() << " Thread ID " << ss.str()
<< " is not responding. Maybe it's destroyed, task stop.";
break;
// just wait 30 seconds
// case1: cpu usage 100%, DeviceQueueOp thread may destroy without thrd_ future
if (wait_times > kWaitInterruptTaskTime) {
MS_LOG(WARNING) << MyName() << " Thread ID " << ss.str()
<< " is not responding. Maybe it's destroyed, task stop.";
break;
}
}
}
#endif

View File

@ -1109,7 +1109,7 @@ bool TaskEmitAction(const ResourcePtr &resource) {
resource->SetResult(kOutput, graph_id);
return true;
}
std::vector<PrimitivePtr> cut_list = compile::nonlinear_ops;
std::vector<PrimitivePtr> cut_list = compile::GetNonlinearOps();
if (bc_ptr->name() == kMsConvert) {
cut_list = compile::GetMsNonlinearOps();
}

View File

@ -7,11 +7,6 @@ file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
list(REMOVE_ITEM DEVICE_SRC_LIST "distribute/mpi_collective_group.cc"
"distribute/collective_group_wrapper.cc" "distribute/mpi_pycc.cc")
if(ENABLE_TDTQUE)
file(GLOB_RECURSE TDT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/engine/tdt/tdt_handle.cc)
endif()
if(ENABLE_MPI AND ENABLE_D)
set_property(SOURCE "distribute/mpi_pycc.cc"
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)

View File

@ -16,16 +16,113 @@
#include "plugin/device/ascend/hal/device/ascend_data_queue.h"
#include <string>
#include "plugin/device/ascend/hal/device/ascend_kernel_runtime.h"
#include <map>
#include <utility>
#include "graph/def_types.h"
#include "common/util/error_manager/error_manager.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "utils/log_adapter.h"
#include "plugin/device/ascend/hal/device/ascend_kernel_runtime.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
namespace mindspore {
namespace device {
namespace {
const std::map<aclDataType, std::string> kAclTypeToString = {
{ACL_INT8, "int8"}, {ACL_UINT8, "uint8"}, {ACL_INT16, "int16"}, {ACL_UINT16, "uint16"},
{ACL_INT32, "int32"}, {ACL_UINT32, "uint32"}, {ACL_INT64, "int64"}, {ACL_UINT64, "uint64"},
{ACL_FLOAT16, "float16"}, {ACL_FLOAT, "float32"}, {ACL_DOUBLE, "float64"}, {ACL_BOOL, "bool"}};
const std::map<std::string, aclDataType> kStringTypeToAclType = []() -> std::map<std::string, aclDataType> {
std::map<std::string, aclDataType> ret;
for (const auto &[acl_type, type_str] : kAclTypeToString) {
ret.emplace(type_str, acl_type);
}
return ret;
}();
constexpr auto kUnknownErrorString = "Unknown error occurred";
std::map<void **, std::thread *> g_acl_handle_map = {};
constexpr auto kMbufHeadEndOfSequencePos = 128U;
constexpr auto kEndOfSequenceFlag = 0x5A;
constexpr auto kTransIdOffset = 64UL;
constexpr auto kSleepMilliSeconds = 500;
std::atomic<bool> g_acl_destroy_all = false;
bool GetAclDataType(const std::string &str_type, aclDataType *acl_type) {
MS_EXCEPTION_IF_NULL(acl_type);
auto iter = kStringTypeToAclType.find(str_type);
if (iter == kStringTypeToAclType.end()) {
MS_LOG(EXCEPTION) << "Invalid type " << str_type;
}
*acl_type = iter->second;
return true;
}
void CheckRtRetWithError(rtError_t error, const std::string &msg) {
if (error != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Rt error: " << msg << " | Error number: " << error;
}
}
void ReportErrorMessage() {
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
}
}
} // namespace
namespace tdt_handle {
void AddHandle(acltdtChannelHandle **handle, std::thread *use_thread) {
if (*handle != nullptr) {
auto ret = g_acl_handle_map.emplace(reinterpret_cast<void **>(handle), use_thread);
if (!std::get<1>(ret)) {
MS_LOG(ERROR) << "Failed to add new handle to acl_handle_map." << std::endl;
}
}
}
void DelHandle(acltdtChannelHandle **handle) {
void **void_handle = reinterpret_cast<void **>(handle);
g_acl_handle_map.erase(void_handle);
}
bool DestroyHandle() {
bool destroy_all = true;
for (auto &item : g_acl_handle_map) {
acltdtChannelHandle **handle = reinterpret_cast<acltdtChannelHandle **>(item.first);
if (*handle != nullptr) {
aclError stop_status = acltdtStopChannel(*handle);
if (stop_status != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed stop acl data channel and the stop status is " << stop_status << std::endl;
return false;
}
if (item.second != nullptr && item.second->joinable()) {
item.second->join();
}
if (acltdtDestroyChannel(*handle) != ACL_SUCCESS) {
MS_LOG(INFO) << "acltdtDestroyChannel failed.";
destroy_all = false;
} else {
*handle = nullptr;
}
}
}
// clear the map container when all the handle has been destroyed
if (destroy_all) {
g_acl_handle_map.clear();
g_acl_destroy_all = true;
}
return destroy_all;
}
bool IsClosed() { return g_acl_destroy_all; }
} // namespace tdt_handle
AscendDataQueueDynamic::AscendDataQueueDynamic(const size_t capacity)
: DataQueue(capacity), stream_(nullptr), node_info_(nullptr) {
auto context_key = device_context_->device_context_key();
@ -35,12 +132,12 @@ AscendDataQueueDynamic::AscendDataQueueDynamic(const size_t capacity)
stream_ = runtime_instance->compute_stream();
}
BlockQueueStatus_T AscendDataQueueDynamic::Push(std::vector<DataQueueItem> data) {
DataQueueStatus AscendDataQueueDynamic::Push(std::vector<DataQueueItem> data) {
for (size_t i = 0; i < data.size(); i++) {
auto &item = data[i];
if (item.data_ptr_ == nullptr) {
MS_LOG(ERROR) << "Invalid Input: ptr: " << item.data_ptr_ << ", len: " << item.data_len_;
return ERROR_INPUT;
return DataQueueStatus::ERROR_INPUT;
}
void *addr = device_context_->device_res_manager_->AllocateMemory(item.data_len_);
if (addr == nullptr) {
@ -52,26 +149,480 @@ BlockQueueStatus_T AscendDataQueueDynamic::Push(std::vector<DataQueueItem> data)
item.device_addr_ = addr;
}
CheckRtRetWithError(rtStreamSynchronize(stream_), "Call runtime rtStreamSynchronize failed");
node_info_[tail_].data_ = data;
node_info_[tail_].data_ = std::move(data);
tail_ = (tail_ + 1) % (capacity_);
++size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T AscendDataQueueDynamic::Front(std::vector<DataQueueItem> *data) const {
DataQueueStatus AscendDataQueueDynamic::Front(std::vector<DataQueueItem> *data) const {
for (auto &item : node_info_[head_].data_) {
host_release_(item.data_ptr_, item.worker_id_);
}
*data = node_info_[head_].data_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T AscendDataQueueDynamic::Pop() {
DataQueueStatus AscendDataQueueDynamic::Pop() {
head_ = (head_ + 1) % (capacity_);
--size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
bool AscendDataQueueDynamic::Destroy() { return true; }
AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(0), channel_name_(channel_name) {
// init ErrorManager, 0 means success
if (ErrorManager::GetInstance().Init() != 0) {
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
}
// get device id
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
device_id_ = MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID);
// create acl tdt handle
if (!channel_name.empty()) {
acl_handle_ = acltdtCreateChannel(device_id_, channel_name.c_str());
if (acl_handle_ == nullptr) {
MS_LOG(ERROR) << "Create channel for sending data failed, please check DEVICE ID setting, DEVICE ID that passed "
"into dataset(from context) and training process should be the same.";
ReportErrorMessage();
}
}
}
AscendTdtQueue::~AscendTdtQueue() {
if (acl_handle_ != nullptr) {
if (acltdtDestroyChannel(acl_handle_) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed to destroy channel for tdt queue.";
ReportErrorMessage();
} else {
tdt_handle::DelHandle(&acl_handle_);
acl_handle_ = nullptr;
}
}
}
bool AscendTdtQueue::Destroy() { return tdt_handle::DestroyHandle(); }
bool AscendTdtQueue::IsOpen() const { return !tdt_handle::IsClosed(); }
DataQueueStatus AscendTdtQueue::Push(std::vector<DataQueueItem> data) {
MS_LOG(DEBUG) << "TDT channel name is " << channel_name_ << ".";
acltdtDataset *acl_dataset = nullptr;
auto ret = Translate(data, &acl_dataset);
if (!ret) {
DestroyAclDataset(acl_dataset);
MS_LOG(ERROR) << "Converting into TDT tensor failed!";
return DataQueueStatus::INTERNAL_ERROR;
}
// Data prefetch only when PS mode enables cache.
if (acltdtGetDatasetSize(acl_dataset) > 0) {
acltdtDataItem *item0 = acltdtGetDataItem(acl_dataset, 0);
std::string item_type;
ParseType(acltdtGetDataTypeFromItem(item0), &item_type);
if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, acltdtGetDataAddrFromItem(item0),
acltdtGetDataSizeFromItem(item0), item_type)) {
MS_LOG(ERROR) << "PrefetchData failed in when pre-processing sending data.";
return DataQueueStatus::INTERNAL_ERROR;
}
}
auto status = acltdtSendTensor(acl_handle_, acl_dataset, -1);
DestroyAclDataset(acl_dataset);
if (status != ACL_SUCCESS) {
// if the device_queue thread had been interrupted by master, just print warning and return success
if (tdt_handle::IsClosed()) {
MS_LOG(WARNING) << "Device queue thread had been interrupted by TdtHandle::DestroyHandle, you can ignore "
<< "the above error: 'failed to send...'. In this scenario, the training ends first without "
<< "using all epoch(s) data, and the data preprocessing is blocked by the data "
<< "transmission channel on the device side. So we force the data transmission channel to stop.";
return DataQueueStatus::SUCCESS;
}
ReportErrorMessage();
MS_LOG(ERROR) << "Tdt Send data failed.";
return DataQueueStatus::INTERNAL_ERROR;
}
return DataQueueStatus::SUCCESS;
}
void AscendTdtQueue::ParseType(aclDataType acl_data_type, std::string *data_type) {
auto type_iter = kAclTypeToString.find(acl_data_type);
if (type_iter == kAclTypeToString.end()) {
MS_LOG(EXCEPTION) << "Got unsupported acl datatype: " << acl_data_type;
}
*data_type = type_iter->second;
}
bool AscendTdtQueue::Translate(const std::vector<DataQueueItem> &data, acltdtDataset **output_acl_dataset) {
auto acl_dataset = acltdtCreateDataset();
if (acl_dataset == nullptr) {
MS_LOG(ERROR) << "Create tdt dataset failed.";
return false;
}
bool status = AssembleTensor2AclDataset(data, acl_dataset);
if (!status) {
DestroyAclDataset(acl_dataset);
MS_LOG(ERROR) << "Assemble tensor row to tdt dataset failed.";
return false;
}
*output_acl_dataset = acl_dataset;
return true;
}
bool AscendTdtQueue::AssembleTensor2AclDataset(const std::vector<DataQueueItem> &data, acltdtDataset *acl_dataset) {
if (data.empty()) {
acltdtDataItem *acl_data =
acltdtCreateDataItem(acltdtTensorType::ACL_TENSOR_DATA_END_OF_SEQUENCE, nullptr, 0, ACL_BOOL, nullptr, 0);
if (acl_data == nullptr) {
MS_LOG(ERROR) << "Create data item failed when send empty data.";
return false;
}
if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_SUCCESS) {
if (acltdtDestroyDataItem(acl_data) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Destroy data item failed when send empty data.";
}
MS_LOG(ERROR) << "Add data item to tdt dataset failed when send data.";
return false;
}
return true;
}
for (const auto &ts : data) {
aclDataType acl_type;
acltdtDataItem *acl_data = nullptr;
if (!GetAclDataType(ts.data_type_, &acl_type)) {
MS_LOG(ERROR) << "Convert type " << ts.data_type_ << " to acl type failed.";
return false;
}
const auto &shape = ts.shapes_;
std::string shape_str = "[";
for (auto dim : shape) {
(void)shape_str.append(std::to_string(dim)).append(",");
}
shape_str.pop_back();
(void)shape_str.append("]");
void *data_ptr = ts.data_ptr_;
size_t data_size = ts.data_len_;
acl_data = acltdtCreateDataItem(acltdtTensorType::ACL_TENSOR_DATA_TENSOR, (shape.empty() ? nullptr : &shape[0]),
shape.size(), acl_type, data_ptr, data_size);
if (acl_data == nullptr) {
MS_LOG(ERROR) << "Create data item failed when send data.";
return false;
}
if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_SUCCESS) {
if (acltdtDestroyDataItem(acl_data) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Destroy data item failed when send data with type ACL_TENSOR_DATA_TENSOR.";
}
MS_LOG(INFO) << "Add data item to tdt dataset failed when send data.";
return false;
}
MS_LOG(DEBUG) << "TDT data type is TDT_TENSOR, tensor type is " << acl_type << ", tensor shape is " << shape_str
<< ", data length is " << data_size << ".";
}
return true;
}
void AscendTdtQueue::DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item) {
if (include_data_item) {
for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) {
if (acltdtDestroyDataItem(acltdtGetDataItem(acl_dataset, i)) != ACL_SUCCESS) {
MS_LOG(EXCEPTION) << "Destroy data item failed when send data.";
}
}
}
if (acltdtDestroyDataset(acl_dataset) != ACL_SUCCESS) {
MS_LOG(EXCEPTION) << "Destroy tdt dataset failed when send data.";
}
}
AscendHostQueue::AscendHostQueue(const std::string &channel_name)
: DataQueue(0), channel_name_(channel_name), queue_id_to_trans_id_map_(), queue_id_(0) {
// init ErrorManager, 0 means success
if (ErrorManager::GetInstance().Init() != 0) {
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
}
// get device id
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
device_id_ = MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID);
if (!HostQueueInit()) {
MS_LOG(WARNING) << "Host queue init failed.";
}
}
DataQueueStatus AscendHostQueue::Push(std::vector<DataQueueItem> data) {
if (!SendDataByHostQueue(data)) {
return DataQueueStatus::INTERNAL_ERROR;
}
return DataQueueStatus::SUCCESS;
}
bool AscendHostQueue::HostQueueInit() {
auto ret = rtSetDevice(device_id_);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtSetDevice failed, ret =" << ret;
return false;
}
ret = rtMemQueueInit(device_id_);
if (ret != ACL_RT_SUCCESS && ret != ACL_ERROR_RT_REPEATED_INIT) {
MS_LOG(ERROR) << "call rtMemQueueInit failed, ret =" << ret;
return false;
}
rtMemQueueAttr_t attr = {};
auto mem_ret = memset_s(attr.name, RT_MQ_MAX_NAME_LEN, 0, RT_MQ_MAX_NAME_LEN);
if (mem_ret != EOK) {
MS_LOG(ERROR) << "call memset_s failed, ret =" << mem_ret;
return false;
}
mem_ret = memcpy_s(attr.name, RT_MQ_MAX_NAME_LEN, channel_name_.c_str(), channel_name_.size() + 1);
if (mem_ret != EOK) {
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
return false;
}
attr.depth = 128U;
attr.workMode = RT_MQ_MODE_DEFAULT;
attr.flowCtrlFlag = false;
attr.flowCtrlDropTime = 0U;
attr.overWriteFlag = false;
ret = rtMemQueueCreate(device_id_, &attr, &queue_id_);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMemQueueCreate failed, ret =" << ret;
return false;
}
rtMemBuffCfg_t buff_cfg = {};
ret = rtMbufInit(&buff_cfg);
if (ret != ACL_RT_SUCCESS && ret != ACL_ERROR_RT_REPEATED_INIT) {
MS_LOG(ERROR) << "call rtMbufInit failed, ret =" << ret;
return false;
}
const std::lock_guard<std::mutex> lk(queue_id_to_trans_id_map_mutex_);
(void)queue_id_to_trans_id_map_.emplace(queue_id_, 0UL);
return true;
}
bool AscendHostQueue::SendDataByHostQueue(const std::vector<DataQueueItem> &data) {
bool status;
bool is_need_resend = false;
void *buff = nullptr;
// Status status;
if (!LaunchTensor2MBuff(data, &buff)) {
return false;
}
do {
if (is_need_resend) {
std::this_thread::sleep_for(std::chrono::milliseconds(kSleepMilliSeconds));
}
status = EnqueueData(buff, &is_need_resend);
} while (status && is_need_resend);
return true;
}
bool AscendHostQueue::SetTransId4MBuf(void **buff) {
void *head_buff = nullptr;
uint64_t head_size = 0UL;
auto ret = rtMbufGetPrivInfo(*buff, &head_buff, &head_size);
if (ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufGetPrivInfo failed, ret =" << ret;
return false;
}
uint64_t *trans_id = reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(head_buff) + head_size - kTransIdOffset);
const std::lock_guard<std::mutex> lk(queue_id_to_trans_id_map_mutex_);
*trans_id = ++queue_id_to_trans_id_map_[queue_id_];
MS_LOG(DEBUG) << "host queue[" << queue_id_ << "] set trans id[" << *trans_id << "] success";
return true;
}
bool AscendHostQueue::LaunchTensor2MBuff(const std::vector<DataQueueItem> &data, void **buff) {
std::vector<DataItemInfo> items;
if (!CreateDataItemInfos(data, &items)) {
return false;
}
if (!SerializeDataItemInfos(&items, buff)) {
return false;
}
if (!SetTransId4MBuf(buff)) {
return false;
}
return true;
}
bool AscendHostQueue::EnqueueData(void *buff, bool *need_resend) {
MS_EXCEPTION_IF_NULL(need_resend);
*need_resend = false;
auto rt_error = rtSetDevice(device_id_);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtSetDevice device failed, ret=" << rt_error;
return false;
}
rt_error = rtMemQueueEnQueue(device_id_, queue_id_, buff);
if (rt_error == RT_ERROR_NONE) {
return true;
} else if (rt_error == ACL_ERROR_RT_QUEUE_FULL) {
*need_resend = true;
MS_LOG(DEBUG) << "queue[" << queue_id_ << "] is full, need call rtMemQueueEnQueue again";
} else {
HostQueueFreeBuff(buff);
MS_LOG(ERROR) << "host enqueue queue[" << queue_id_ << "] failed, ret = " << rt_error;
return false;
}
return true;
}
bool AscendHostQueue::CreateDataItemInfos(const std::vector<DataQueueItem> &data, std::vector<DataItemInfo> *items) {
MS_EXCEPTION_IF_NULL(items);
if (data.empty()) {
items->emplace_back(BuildDataItemInfo(ACL_TENSOR_DATA_END_OF_SEQUENCE, ACL_BOOL, nullptr, 0UL, nullptr, 0UL));
return true;
}
for (auto &ts : data) {
aclDataType acl_type;
if (!GetAclDataType(ts.data_type_, &acl_type)) {
MS_LOG(ERROR) << "Convert type " << ts.data_type_ << " to acl type failed.";
return false;
}
const auto &shape = ts.shapes_;
void *data_ptr = ts.data_ptr_;
size_t data_size = ts.data_len_;
if (ts.data_type_ != "string") {
items->emplace_back(BuildDataItemInfo(ACL_TENSOR_DATA_TENSOR, static_cast<int32_t>(acl_type),
(shape.empty() ? nullptr : &shape[0]), shape.size(), data_ptr, data_size));
} else {
MS_LOG(ERROR) << "Create data item failed when send data with type:" << ts.data_type_;
}
}
return true;
}
bool AscendHostQueue::SerializeDataItemInfos(std::vector<DataItemInfo> *items, void **buff) {
size_t cnt = items->size();
size_t total_size = 0UL;
for (size_t i = 0UL; i < cnt; ++i) {
(*items)[i].ctrl_info.cur_cnt = i;
(*items)[i].ctrl_info.cnt = cnt;
total_size +=
sizeof(DataItemInfo::ItemInfo) + (*items)[i].ctrl_info.dim_num * sizeof(int64_t) + (*items)[i].ctrl_info.data_len;
}
auto rt_error = rtMbufAlloc(buff, total_size);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufAlloc with size[" << total_size << "] failed, ret = " << rt_error;
return false;
}
void *data = nullptr;
rt_error = rtMbufGetBuffAddr(*buff, &data);
if (rt_error != ACL_RT_SUCCESS) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call rtMbufGetBuffAddr with size[" << total_size << "] failed, ret = " << rt_error;
return false;
}
void *head_buf = nullptr;
uint64_t head_size = 0UL;
rt_error = rtMbufGetPrivInfo(*buff, &head_buf, &head_size);
if (rt_error != ACL_RT_SUCCESS) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call rtMbufGetPrivInfo failed, ret =" << rt_error;
return false;
}
if ((head_buf != nullptr) && (head_size > kMbufHeadEndOfSequencePos)) {
MS_LOG(DEBUG) << "host queue set end_of_sequence mbuf head.";
}
size_t offset = 0UL;
for (size_t i = 0UL; i < cnt; ++i) {
auto mem_ret = memcpy_s(::ge::ValueToPtr(::ge::PtrToValue(data) + offset), sizeof(DataItemInfo::ItemInfo),
&((*items)[i].ctrl_info), sizeof(DataItemInfo::ItemInfo));
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
return false;
}
offset += sizeof(DataItemInfo::ItemInfo);
for (size_t j = 0UL; j < (*items)[i].ctrl_info.dim_num; ++j) {
mem_ret = memcpy_s(::ge::ValueToPtr(::ge::PtrToValue(data) + offset), sizeof(int64_t), &((*items)[i].dims[j]),
sizeof(int64_t));
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
return false;
}
offset += sizeof(int64_t);
}
if ((*items)[i].ctrl_info.data_len == 0UL) {
continue;
}
mem_ret = memcpy_s(::ge::ValueToPtr(::ge::PtrToValue(data) + offset), (*items)[i].ctrl_info.data_len,
(*items)[i].data_ptr, (*items)[i].ctrl_info.data_len);
if (mem_ret != EOK) {
(void)rtMbufFree(*buff);
MS_LOG(ERROR) << "call memcpy_s failed, ret =" << mem_ret;
return false;
}
offset += (*items)[i].ctrl_info.data_len;
}
return true;
}
AscendHostQueue::DataItemInfo AscendHostQueue::BuildDataItemInfo(acltdtTensorType acl_data_type, int32_t tensor_type,
const int64_t *dims, size_t dim_size, void *data_ptr,
uint64_t data_len) {
DataItemInfo item = {};
item.ctrl_info.data_type = static_cast<int32_t>(acl_data_type);
item.ctrl_info.tensor_type = tensor_type;
item.ctrl_info.dim_num = dim_size;
item.ctrl_info.data_len = data_len;
item.dims = std::vector<int64_t>(dims, dims + dim_size);
item.data_ptr = data_ptr;
return item;
}
void AscendHostQueue::HostQueueFreeBuff(void *buff) {
auto rt_error = rtMbufFree(buff);
if (rt_error != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "call rtMbufFree failed, ret=" << rt_error;
}
}
namespace {
std::shared_ptr<DataQueue> CreateAscendDataQueue(const std::string &channel_name, bool dynamic_shape, size_t capacity,
void *, const std::vector<size_t> &) {
if (dynamic_shape) {
return std::make_shared<AscendDataQueueDynamic>(capacity);
}
int32_t is_heterogeneous = 0;
(void)rtGetIsHeterogenous(&is_heterogeneous);
if (is_heterogeneous != 0) {
return std::make_shared<AscendHostQueue>(channel_name);
} else {
return std::make_shared<AscendTdtQueue>(channel_name);
}
}
REGISTER_DATA_QUEUE_CREATOR(kAscendDevice, CreateAscendDataQueue);
} // namespace
} // namespace device
} // namespace mindspore

View File

@ -20,23 +20,27 @@
#include <unistd.h>
#include <memory>
#include <vector>
#include <string>
#include <map>
#include <functional>
#include "runtime/hardware/device_context_manager.h"
#include "runtime/data_queue/data_queue.h"
#include "runtime/rt.h"
#include "include/backend/data_queue/data_queue.h"
#include "include/backend/visible.h"
#include "runtime/rt.h"
#include "acl/acl_tdt.h"
namespace mindspore {
namespace device {
class BACKEND_EXPORT AscendDataQueueDynamic : public DataQueue {
public:
explicit AscendDataQueueDynamic(const size_t capacity);
virtual ~AscendDataQueueDynamic() = default;
~AscendDataQueueDynamic() override = default;
BlockQueueStatus_T Push(std::vector<DataQueueItem> data);
BlockQueueStatus_T Front(std::vector<DataQueueItem> *data) const;
BlockQueueStatus_T Pop();
bool Destroy();
DataQueueStatus Push(std::vector<DataQueueItem> data) override;
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override;
DataQueueStatus Pop() override;
bool Destroy() override;
void SetThreadDevice() override {}
private:
struct NodeInfo {
@ -45,7 +49,82 @@ class BACKEND_EXPORT AscendDataQueueDynamic : public DataQueue {
rtStream_t stream_;
std::unique_ptr<NodeInfo[]> node_info_;
};
namespace tdt_handle {
void AddHandle(acltdtChannelHandle **handle, std::thread *use_thread);
bool DestroyHandle();
void DelHandle(acltdtChannelHandle **handle);
bool IsClosed();
} // namespace tdt_handle
class AscendTdtQueue : public DataQueue {
public:
explicit AscendTdtQueue(const std::string &channel_name);
~AscendTdtQueue() override;
bool IsOpen() const override;
DataQueueStatus Push(std::vector<DataQueueItem> data) override;
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override { return DataQueueStatus::SUCCESS; }
DataQueueStatus Pop() override { return DataQueueStatus::SUCCESS; }
bool Destroy() override;
void SetThreadDevice() override {}
private:
void DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item = true);
bool AssembleTensor2AclDataset(const std::vector<DataQueueItem> &data, acltdtDataset *acl_dataset);
void ParseType(aclDataType acl_data_type, std::string *data_type);
bool Translate(const std::vector<DataQueueItem> &data, acltdtDataset **output_acl_dataset);
acltdtChannelHandle *acl_handle_;
std::string channel_name_;
uint32_t device_id_;
};
class AscendHostQueue : public DataQueue {
public:
explicit AscendHostQueue(const std::string &channel_name);
~AscendHostQueue() override = default;
DataQueueStatus Push(std::vector<DataQueueItem> data) override;
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override { return DataQueueStatus::SUCCESS; }
DataQueueStatus Pop() override { return DataQueueStatus::SUCCESS; }
bool Destroy() override { return true; }
void SetThreadDevice() override {}
struct DataItemInfo {
struct ItemInfo {
int32_t version;
int32_t data_type;
uint32_t cur_cnt;
uint32_t cnt;
int32_t tensor_type;
uint32_t dim_num;
char reserved[32];
uint64_t data_len;
} ctrl_info;
std::vector<int64_t> dims;
void *data_ptr;
};
private:
bool HostQueueInit();
bool SendDataByHostQueue(const std::vector<DataQueueItem> &data);
bool SetTransId4MBuf(void **buff);
bool LaunchTensor2MBuff(const std::vector<DataQueueItem> &data, void **buff);
bool EnqueueData(void *buff, bool *need_resend);
bool CreateDataItemInfos(const std::vector<DataQueueItem> &data, std::vector<DataItemInfo> *items);
bool SerializeDataItemInfos(std::vector<DataItemInfo> *items, void **buff);
DataItemInfo BuildDataItemInfo(acltdtTensorType acl_data_type, int32_t tensor_type, const int64_t *dims,
size_t dim_size, void *data_ptr, uint64_t data_len);
void HostQueueFreeBuff(void *buff);
std::string channel_name_;
uint32_t device_id_;
std::mutex queue_id_to_trans_id_map_mutex_;
std::map<uint32_t, uint64_t> queue_id_to_trans_id_map_;
uint32_t queue_id_;
};
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_BLOCKING_QUEUE_H_

View File

@ -62,10 +62,7 @@
#endif
#include "include/common/utils/config_manager.h"
#include "plugin/device/ascend/hal/hccl_adapter/hccl_adapter.h"
#ifdef ENABLE_TDTQUE
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_handle.h"
using mindspore::dataset::TdtHandle;
#endif
#include "plugin/device/ascend/hal/device/ascend_data_queue.h"
#ifdef ENABLE_DUMP_IR
#include "include/common/debug/rdr/recorder_manager.h"
#endif
@ -1102,10 +1099,10 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph &graph) {
#ifndef ENABLE_SECURITY
DumpTaskExceptionInfo(graph);
#endif
#ifdef ENABLE_TDTQUE
#ifdef WITH_BACKEND
// Run task error, we should call TdtHostDestroy to release tdt to avoid DeviceQueueOp hostPush hung
// case1: cpu usage 100% cause thread/process exit, but some tdt thread remain in backend
if (!TdtHandle::DestroyHandle()) {
if (!tdt_handle::DestroyHandle()) {
MS_LOG(WARNING) << "Destroy tdt channel failed.";
} else {
MS_LOG(INFO) << "Destroy tdt channel success.";

View File

@ -27,7 +27,7 @@
#include "ir/dtype/type.h"
#include "acl/acl_tdt.h"
#include "proto/print.pb.h"
#include "minddata/dataset/engine/device_queue_impl/tdt/tdt_handle.h"
#include "plugin/device/ascend/hal/device/ascend_data_queue.h"
namespace py = pybind11;
namespace mindspore::device::ascend {
@ -392,7 +392,7 @@ void CreateTensorPrintThread(const PrintThreadCrt &ctr) {
<< MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_TSD_REF) << ".";
std::string print_file_path = MsContext::GetInstance()->get_param<std::string>(MS_CTX_PRINT_FILE_PATH);
g_acl_tdt_print = ctr(print_file_path, g_acl_handle);
dataset::TdtHandle::AddHandle(&g_acl_handle, &g_acl_tdt_print);
tdt_handle::AddHandle(&g_acl_handle, &g_acl_tdt_print);
}
void DestroyTensorPrintThread() {
@ -419,7 +419,7 @@ void DestroyTensorPrintThread() {
MS_LOG(ERROR) << "Failed destroy acl channel and the destroyed_status is " << destroyed_status << std::endl;
return;
}
dataset::TdtHandle::DelHandle(&g_acl_handle);
tdt_handle::DelHandle(&g_acl_handle);
MS_LOG(INFO) << "Succeed destroy acl channel";
}
} // namespace mindspore::device::ascend

View File

@ -297,12 +297,10 @@ bool AscendDeprecatedInterface::OpenTsd(const std::shared_ptr<MsContext> &ms_con
MS_LOG(EXCEPTION) << "Device " << device_id << " call rtSetDevice failed, ret[" << static_cast<int>(ret) << "]";
}
ms_context_ptr->increase_param<uint32_t>(MS_CTX_TSD_REF);
#ifdef ENABLE_TDTQUE
auto thread_crt = [](const std::string &path, const acltdtChannelHandle *acl_handle) {
return std::thread(TensorPrint(path, acl_handle));
};
CreateTensorPrintThread(thread_crt);
#endif
return true;
}
@ -315,10 +313,8 @@ bool AscendDeprecatedInterface::CloseTsd(const std::shared_ptr<MsContext> &ms_co
ms_context_ptr->decrease_param<uint32_t>(MS_CTX_TSD_REF);
if (force || ms_context_ptr->get_param<uint32_t>(MS_CTX_TSD_REF) == 0) {
ms_context_ptr->set_param<uint32_t>(MS_CTX_TSD_REF, 0);
#ifdef ENABLE_TDTQUE
pybind11::gil_scoped_release gil_release;
DestroyTensorPrintThread();
#endif
if (ErrorManager::GetInstance().Init() != 0) {
MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out.";
}

View File

@ -15,7 +15,7 @@
*/
#include "plugin/device/ascend/hal/hardware/ascend_device_res_manager.h"
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "runtime/rt.h"
namespace mindspore {

View File

@ -29,7 +29,7 @@
#include "acl/acl_rt.h"
#include "runtime/device/kernel_runtime.h"
#include "utils/ms_context.h"
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
namespace mindspore {
namespace kernel {

View File

@ -15,28 +15,55 @@
*/
#include "plugin/device/gpu/hal/device/gpu_data_queue.h"
#include <string>
#include "plugin/device/gpu/hal/device/queue_common.h"
#include <utility>
#include "include/backend/data_queue/data_queue_mgr.h"
#include "utils/ms_context.h"
#include "plugin/device/gpu/hal/device/queue_common.h"
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
namespace mindspore {
namespace device {
namespace {
void SetThreadToDeviceId(uint32_t device_id) {
// Without cudaSetDevice cuda memory will allocate on GPU:0 as default
// and will overload in distribute scenario.
auto ret = cudaSetDevice(device_id);
if (ret != cudaSuccess) {
MS_LOG(EXCEPTION) << "cudaSetDevice failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
}
}
std::shared_ptr<void> AllocHostMemory(size_t size) {
void *ptr;
auto ret = cudaHostAlloc(&ptr, size, cudaHostAllocDefault);
if (ret != cudaSuccess) {
MS_LOG(EXCEPTION) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
}
return std::shared_ptr<void>(ptr, [](void *ptr) -> void {
if (ptr != nullptr) {
(void)cudaFreeHost(ptr);
}
});
}
} // namespace
GpuDataQueueDynamic::GpuDataQueueDynamic(const size_t capacity) : DataQueue(capacity), node_info_(nullptr) {
node_info_ = std::make_unique<NodeInfo[]>(capacity);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
const std::string &device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
uint32_t device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
device_context_ = DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_target, device_id});
device_id_ = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
device_context_ = DeviceContextManager::GetInstance().GetOrCreateDeviceContext({device_target, device_id_});
device_context_->Initialize();
stream_ = reinterpret_cast<cudaStream_t>(gpu::GPUDeviceManager::GetInstance().default_stream());
}
BlockQueueStatus_T GpuDataQueueDynamic::Push(std::vector<DataQueueItem> data) {
DataQueueStatus GpuDataQueueDynamic::Push(std::vector<DataQueueItem> data) {
for (size_t i = 0; i < data.size(); i++) {
auto &item = data[i];
if (item.data_ptr_ == nullptr) {
MS_LOG(ERROR) << "Invalid Input: ptr: " << item.data_ptr_ << ", len: " << item.data_len_;
return ERROR_INPUT;
return DataQueueStatus::ERROR_INPUT;
}
void *addr = device_context_->device_res_manager_->AllocateMemory(item.data_len_);
CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(addr, item.data_ptr_, item.data_len_, cudaMemcpyHostToDevice, stream_),
@ -47,42 +74,49 @@ BlockQueueStatus_T GpuDataQueueDynamic::Push(std::vector<DataQueueItem> data) {
node_info_[tail_].event_.reset(new cudaEvent_t());
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&(*(node_info_[tail_].event_))), "Cuda Create Event Failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(*(node_info_[tail_].event_), stream_), "Cuda Create Event Failed");
node_info_[tail_].data_ = data;
node_info_[tail_].data_ = std::move(data);
tail_ = (tail_ + 1) % (capacity_);
++size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T GpuDataQueueDynamic::Front(std::vector<DataQueueItem> *data) const {
DataQueueStatus GpuDataQueueDynamic::Front(std::vector<DataQueueItem> *data) const {
CHECK_CUDA_RET_WITH_ERROR(cudaEventSynchronize(*(node_info_[head_].event_)), "Cuda Event Syn Failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(*(node_info_[head_].event_)), "Cuda Destroy Event Failed");
for (auto &item : node_info_[head_].data_) {
host_release_(item.data_ptr_, item.worker_id_);
}
*data = node_info_[head_].data_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T GpuDataQueueDynamic::Pop() {
DataQueueStatus GpuDataQueueDynamic::Pop() {
head_ = (head_ + 1) % (capacity_);
--size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
bool GpuDataQueueDynamic::Destroy() { return true; }
GpuQueue::GpuQueue(void *addr, const std::vector<size_t> &shape, const size_t &capacity)
void GpuDataQueueDynamic::SetThreadDevice() { SetThreadToDeviceId(device_id_); }
std::shared_ptr<void> GpuDataQueueDynamic::AllocHostMem(size_t size) { return AllocHostMemory(size); }
GpuQueue::GpuQueue(size_t capacity, void *addr, const std::vector<size_t> &shape)
: DataQueue(capacity), buffer_(addr), shape_(shape), len_(0), stream_(0), node_info_(nullptr) {
CHECK_CUDA_RET_WITH_ERROR(cudaStreamCreate(&stream_), "Cuda Create Stream Failed");
node_info_ = std::make_unique<NodeInfo[]>(capacity);
for (auto item : shape) {
len_ += item;
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
device_id_ = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
}
GpuQueue::~GpuQueue() { buffer_ = nullptr; }
BlockQueueStatus_T GpuQueue::Push(std::vector<DataQueueItem> data) {
DataQueueStatus GpuQueue::Push(std::vector<DataQueueItem> data) {
void *addr = reinterpret_cast<uint8_t *>(buffer_) + tail_ * len_;
for (size_t i = 0; i < data.size(); i++) {
auto &item = data[i];
@ -97,7 +131,7 @@ BlockQueueStatus_T GpuQueue::Push(std::vector<DataQueueItem> data) {
<< shape_[i] << "), "
<< "you need to call network.set_inputs() to "
"configure dynamic dims of input data before running the network";
return ERROR_INPUT;
return DataQueueStatus::ERROR_INPUT;
}
CHECK_CUDA_RET_WITH_ERROR(cudaMemcpyAsync(addr, item.data_ptr_, item.data_len_, cudaMemcpyHostToDevice, stream_),
@ -109,26 +143,26 @@ BlockQueueStatus_T GpuQueue::Push(std::vector<DataQueueItem> data) {
node_info_[tail_].event_.reset(new cudaEvent_t());
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&(*(node_info_[tail_].event_))), "Cuda Create Event Failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(*(node_info_[tail_].event_), stream_), "Cuda Create Event Failed");
node_info_[tail_].data_ = data;
node_info_[tail_].data_ = std::move(data);
tail_ = (tail_ + 1) % (capacity_);
++size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T GpuQueue::Front(std::vector<DataQueueItem> *data) const {
DataQueueStatus GpuQueue::Front(std::vector<DataQueueItem> *data) const {
CHECK_CUDA_RET_WITH_ERROR(cudaEventSynchronize(*(node_info_[head_].event_)), "Cuda Event Syn Failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(*(node_info_[head_].event_)), "Cuda Destroy Event Failed");
for (auto &item : node_info_[head_].data_) {
host_release_(item.data_ptr_, item.worker_id_);
}
*data = node_info_[head_].data_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T GpuQueue::Pop() {
DataQueueStatus GpuQueue::Pop() {
head_ = (head_ + 1) % (capacity_);
--size_;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
bool GpuQueue::Destroy() {
@ -143,5 +177,22 @@ bool GpuQueue::Destroy() {
return true;
}
}
void GpuQueue::SetThreadDevice() { SetThreadToDeviceId(device_id_); }
std::shared_ptr<void> GpuQueue::AllocHostMem(size_t size) { return AllocHostMemory(size); }
namespace {
std::shared_ptr<DataQueue> CreateGpuDataQueue(const std::string &, bool dynamic_shape, size_t capacity, void *addr,
const std::vector<size_t> &shape) {
if (dynamic_shape) {
return std::make_shared<GpuDataQueueDynamic>(capacity);
}
return std::make_shared<GpuQueue>(capacity, addr, shape);
}
REGISTER_DATA_QUEUE_CREATOR(kGPUDevice, CreateGpuDataQueue);
} // namespace
} // namespace device
} // namespace mindspore

View File

@ -22,7 +22,7 @@
#include <memory>
#include <vector>
#include <functional>
#include "runtime/data_queue/data_queue.h"
#include "include/backend/data_queue/data_queue.h"
#include "runtime/hardware/device_context_manager.h"
#include "include/backend/visible.h"
@ -31,12 +31,15 @@ namespace device {
class BACKEND_EXPORT GpuDataQueueDynamic : public DataQueue {
public:
explicit GpuDataQueueDynamic(const size_t capacity);
virtual ~GpuDataQueueDynamic() = default;
~GpuDataQueueDynamic() override = default;
BlockQueueStatus_T Push(std::vector<DataQueueItem> data);
BlockQueueStatus_T Front(std::vector<DataQueueItem> *data) const;
BlockQueueStatus_T Pop();
bool Destroy();
DataQueueStatus Push(std::vector<DataQueueItem> data) override;
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override;
DataQueueStatus Pop() override;
bool Destroy() override;
void SetThreadDevice() override;
std::shared_ptr<void> AllocHostMem(size_t size) override;
private:
struct NodeInfo {
@ -48,17 +51,21 @@ class BACKEND_EXPORT GpuDataQueueDynamic : public DataQueue {
cudaStream_t stream_;
std::unique_ptr<NodeInfo[]> node_info_;
uint32_t device_id_;
};
class BACKEND_EXPORT GpuQueue : public DataQueue {
public:
GpuQueue(void *addr, const std::vector<size_t> &shape, const size_t &capacity);
virtual ~GpuQueue();
GpuQueue(size_t capacity, void *addr, const std::vector<size_t> &shape);
~GpuQueue() override;
BlockQueueStatus_T Push(std::vector<DataQueueItem> data);
BlockQueueStatus_T Front(std::vector<DataQueueItem> *data) const;
BlockQueueStatus_T Pop();
bool Destroy();
DataQueueStatus Push(std::vector<DataQueueItem> data) override;
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override;
DataQueueStatus Pop() override;
bool Destroy() override;
void SetThreadDevice() override;
std::shared_ptr<void> AllocHostMem(size_t size) override;
private:
struct NodeInfo {
@ -73,6 +80,7 @@ class BACKEND_EXPORT GpuQueue : public DataQueue {
cudaStream_t stream_;
std::unique_ptr<NodeInfo[]> node_info_;
bool ds_detected_{false};
uint32_t device_id_;
};
} // namespace device
} // namespace mindspore

View File

@ -21,7 +21,7 @@
#include "plugin/device/gpu/hal/device/gpu_device_address.h"
#include "plugin/device/gpu/hal/device/cuda_driver.h"
#include "plugin/device/gpu/hal/device/gpu_event.h"
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"

View File

@ -26,7 +26,7 @@
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
#include "plugin/device/gpu/hal/hardware/gpu_somas.h"
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "kernel/common_utils.h"
#include "plugin/device/gpu/hal/device/gpu_common.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"

View File

@ -17,7 +17,7 @@
#include "plugin/device/gpu/kernel/data/dataset_init_kernel.h"
#include "plugin/device/gpu/kernel/data/dataset_utils.h"
#include "kernel/common_utils.h"
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
#include "include/common/utils/convert_utils.h"
@ -60,7 +60,7 @@ bool DatasetInitKernelMod::Launch(const std::vector<AddressPtr> &, const std::ve
}
auto status = DataQueueMgr::GetInstance().Create(queue_name_, addr, shapes_, buffer_q_capacity_);
if (status) {
if (status != device::DataQueueStatus::SUCCESS) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', init Dataset Failed. len: " << len << ", status:" << status;
}

View File

@ -27,7 +27,7 @@
#ifndef ENABLE_SECURITY
#include "plugin/device/gpu/hal/profiler/gpu_profiling.h"
#endif
#include "runtime/data_queue/data_queue_mgr.h"
#include "include/backend/data_queue/data_queue_mgr.h"
#include "plugin/device/gpu/hal/device/gpu_common.h"
#ifdef ENABLE_DUMP_IR
#include "include/common/debug/rdr/recorder_manager.h"
@ -113,7 +113,7 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataQueueItem> *data) {
}
#endif
auto ret = DataQueueMgr::GetInstance().Front(queue_name_, data);
if (ret == device::SUCCESS) {
if (ret == device::DataQueueStatus::SUCCESS) {
#ifndef ENABLE_SECURITY
if (profiling_enable_) {
uint64_t end_time_stamp = profiling_op_->GetTimeStamp();
@ -123,7 +123,7 @@ bool DatasetIteratorKernelMod::ReadDevice(std::vector<DataQueueItem> *data) {
break;
}
if (ret == device::TIMEOUT) {
if (ret == device::DataQueueStatus::TIMEOUT) {
repeat++;
if (repeat < 10) {
MS_LOG(INFO) << "Waiting for data...(" << repeat << " / 10)";
@ -155,7 +155,7 @@ bool DatasetIteratorKernelMod::Launch(const std::vector<AddressPtr> &, const std
}
if (!is_opened_) {
auto ret = DataQueueMgr::GetInstance().OpenDynamicBufQueue(queue_name_);
if (ret != device::BlockQueueStatus_T::SUCCESS) {
if (ret != device::DataQueueStatus::SUCCESS) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', gpu Queue(" << queue_name_ << ") Open Failed: " << ret;
}
is_opened_ = true;

View File

@ -24,7 +24,7 @@
#include "plugin/device/gpu/kernel/data/dataset_profiling.h"
#include "plugin/device/gpu/kernel/gpu_kernel.h"
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
#include "runtime/data_queue/blocking_queue.h"
#include "include/backend/data_queue/blocking_queue.h"
namespace mindspore {
namespace kernel {
using mindspore::device::DataQueueItem;

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "runtime/data_queue/blocking_queue.h"
#include "include/backend/data_queue/blocking_queue.h"
#include "utils/log_adapter.h"
namespace mindspore {
@ -22,60 +22,60 @@ namespace device {
const size_t kTimeout = 100;
void BlockingQueue::RegisterRelease(const std::function<void(void *, int32_t)> &func) { queue_->RegisterRelease(func); }
BlockQueueStatus_T BlockingQueue::Push(const std::vector<DataQueueItem> &data, unsigned int) {
DataQueueStatus BlockingQueue::Push(const std::vector<DataQueueItem> &data, unsigned int) {
std::unique_lock<std::mutex> locker(mutex_);
if (queue_->IsFull()) {
if (not_full_cond_.wait_for(locker, std::chrono::microseconds(kTimeout)) == std::cv_status::timeout) {
return TIMEOUT;
return DataQueueStatus::TIMEOUT;
}
}
auto ret = queue_->Push(data);
if (ret != SUCCESS) {
if (ret != DataQueueStatus::SUCCESS) {
return ret;
}
not_empty_cond_.notify_one();
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T BlockingQueue::Front(std::vector<DataQueueItem> *data) {
DataQueueStatus BlockingQueue::Front(std::vector<DataQueueItem> *data) {
std::unique_lock<std::mutex> locker(mutex_);
bool timeout = not_empty_cond_.wait_for(locker, std::chrono::seconds(30), [this] { return !queue_->IsEmpty(); });
if (!timeout) {
return TIMEOUT;
return DataQueueStatus::TIMEOUT;
}
return queue_->Front(data);
}
BlockQueueStatus_T BlockingQueue::Pop() {
DataQueueStatus BlockingQueue::Pop() {
std::unique_lock<std::mutex> locker(mutex_);
not_empty_cond_.wait(locker, [this] { return !queue_->IsEmpty(); });
auto ret = queue_->Pop();
if (ret != SUCCESS) {
if (ret != DataQueueStatus::SUCCESS) {
return ret;
}
not_full_cond_.notify_one();
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T BlockingQueue::Create(const std::shared_ptr<DataQueue> &data_queue) {
DataQueueStatus BlockingQueue::Create(const std::shared_ptr<DataQueue> &data_queue) {
this->queue_ = data_queue;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T BlockingQueue::Clear() {
DataQueueStatus BlockingQueue::Clear() {
std::unique_lock<std::mutex> locker(mutex_);
while (Size() > 0) {
std::vector<DataQueueItem> data;
auto ret = queue_->Front(&data);
if (ret != SUCCESS) {
if (ret != DataQueueStatus::SUCCESS) {
return ret;
}
ret = queue_->Pop();
if (ret != SUCCESS) {
if (ret != DataQueueStatus::SUCCESS) {
return ret;
}
}
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
bool BlockingQueue::Destroy() {

View File

@ -14,9 +14,11 @@
* limitations under the License.
*/
#include "runtime/data_queue/data_queue.h"
#include "include/backend/data_queue/data_queue.h"
#include <string>
#include "utils/ms_context.h"
#include "runtime/hardware/device_context_manager.h"
namespace mindspore {
namespace device {
DataQueue::DataQueue(const size_t capacity)

View File

@ -14,18 +14,16 @@
* limitations under the License.
*/
#include "runtime/data_queue/data_queue_mgr.h"
#if ENABLE_GPU
#include "plugin/device/gpu/hal/device/gpu_data_queue.h"
#elif ENABLE_D
#include "plugin/device/ascend/hal/device/ascend_data_queue.h"
#endif
#include "include/backend/data_queue/data_queue_mgr.h"
#include <algorithm>
#include <utility>
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
#include "utils/ms_context.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "include/common/utils/anfalgo.h"
#include "include/backend/data_queue/blocking_queue.h"
#include "runtime/device/kernel_info.h"
namespace py = pybind11;
@ -36,136 +34,151 @@ DataQueueMgr &DataQueueMgr::GetInstance() noexcept {
return instance;
}
BlockQueueStatus_T DataQueueMgr::Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
const size_t &capacity) {
void DataQueueMgr::RegisterDataQueueCreator(const std::string &device_name, DataQueueCreator &&creator) {
data_queue_creator_map_.emplace(device_name, std::forward<DataQueueCreator>(creator));
}
std::shared_ptr<DataQueue> DataQueueMgr::CreateDataQueue(const std::string &device_name,
const std::string &channel_name, bool dynamic_shape,
size_t capacity, void *addr,
const std::vector<size_t> &shape) {
auto iter = data_queue_creator_map_.find(device_name);
if (iter == data_queue_creator_map_.end()) {
return nullptr;
}
return iter->second(channel_name, dynamic_shape, capacity, addr, shape);
}
DataQueueStatus DataQueueMgr::Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
const size_t &capacity) {
MS_LOG(INFO) << "Static GPU queue: " << channel_name << " created";
if (name_queue_map_.find(channel_name) != name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue already exist: " << channel_name;
return QUEUE_EXIST;
return DataQueueStatus::QUEUE_EXIST;
}
#if ENABLE_GPU
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
std::shared_ptr<DataQueue> gpu_queue = std::make_shared<GpuQueue>(addr, shape, capacity);
BlockQueueStatus_T rt = queue->Create(gpu_queue);
if (rt != SUCCESS) {
MS_LOG(ERROR) << "Queue: " << channel_name << "create failed: " << rt;
return rt;
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
std::shared_ptr<DataQueue> data_queue = CreateDataQueue(
MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET), channel_name, false, capacity, addr, shape);
if (data_queue != nullptr) {
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
DataQueueStatus rt = queue->Create(data_queue);
if (rt != DataQueueStatus::SUCCESS) {
MS_LOG(ERROR) << "Queue: " << channel_name << "create failed: " << rt;
return rt;
}
(void)name_queue_map_.insert(std::make_pair(channel_name, queue));
init_ = true;
return DataQueueStatus::SUCCESS;
}
(void)name_queue_map_.insert(std::make_pair(channel_name, queue));
init_ = true;
return SUCCESS;
#else
MS_LOG(ERROR) << "Static data queue only support GPU target.";
return INTERNAL_ERROR;
#endif
return DataQueueStatus::INTERNAL_ERROR;
}
BlockQueueStatus_T DataQueueMgr::Open(const std::string &channel_name,
const std::function<void(void *, int32_t)> func) {
DataQueueStatus DataQueueMgr::Open(const std::string &channel_name, const std::function<void(void *, int32_t)> func) {
MS_LOG(INFO) << "Gpu queue: " << channel_name << " open.";
if (name_queue_map_.find(channel_name) == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
name_queue_map_[channel_name]->RegisterRelease(func);
open_by_dataset_++;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T DataQueueMgr::OpenDynamicBufQueue(const std::string &channel_name,
const std::function<void(void *, int32_t)> func) {
DataQueueStatus DataQueueMgr::OpenDynamicBufQueue(const std::string &channel_name,
const std::function<void(void *, int32_t)> func) {
std::unique_lock<std::mutex> locker(mutex_);
if (name_queue_map_.find(channel_name) == name_queue_map_.end()) {
BlockQueueStatus_T status = CreateDynamicBufQueue(channel_name, default_capacity_);
MS_EXCEPTION_IF_CHECK_FAIL(status == BlockQueueStatus_T::SUCCESS, "Create dynamic buffer queue failed");
DataQueueStatus status = CreateDynamicBufQueue(channel_name, default_capacity_);
MS_EXCEPTION_IF_CHECK_FAIL(status == DataQueueStatus::SUCCESS, "Create dynamic buffer queue failed");
MS_LOG_INFO << "Create dynamic buffer queue: " << channel_name;
cv_.notify_all();
}
name_queue_map_[channel_name]->RegisterRelease(func);
open_by_dataset_++;
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T DataQueueMgr::OpenDynamicBufQueue(const std::string &channel_name) {
DataQueueStatus DataQueueMgr::OpenDynamicBufQueue(const std::string &channel_name) {
std::unique_lock<std::mutex> locker(mutex_);
auto time_out = cv_.wait_for(locker, std::chrono::seconds(MAX_WAIT_TIME_IN_SEC),
[this, &channel_name] { return name_queue_map_.count(channel_name); });
if (!time_out) {
return TIMEOUT;
return DataQueueStatus::TIMEOUT;
}
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T DataQueueMgr::CreateDynamicBufQueue(const std::string &channel_name, const size_t &capacity) {
DataQueueStatus DataQueueMgr::CreateDynamicBufQueue(const std::string &channel_name, const size_t &capacity) {
if (name_queue_map_.find(channel_name) != name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue already exist: " << channel_name;
return QUEUE_EXIST;
return DataQueueStatus::QUEUE_EXIST;
}
#if ENABLE_GPU
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
std::shared_ptr<DataQueue> device_queue = std::make_shared<GpuDataQueueDynamic>(capacity);
#elif ENABLE_D
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
std::shared_ptr<DataQueue> device_queue = std::make_shared<AscendDataQueueDynamic>(capacity);
#else
MS_LOG(ERROR) << "Dynamic data queue only support Ascend/GPU target.";
return QUEUE_EXIST;
#endif
#if ENABLE_GPU || ENABLE_D
BlockQueueStatus_T rt = queue->Create(device_queue);
if (rt != SUCCESS) {
MS_LOG(ERROR) << "Queue: " << channel_name << "create failed: " << rt;
return rt;
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
std::string device_name = MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET);
std::shared_ptr<DataQueue> device_queue = CreateDataQueue(device_name, channel_name, true, capacity, nullptr, {});
if (device_queue != nullptr && (device_name == kGPUDevice || device_name == kAscendDevice)) {
std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
DataQueueStatus rt = queue->Create(device_queue);
if (rt != DataQueueStatus::SUCCESS) {
MS_LOG(ERROR) << "Queue: " << channel_name << "create failed: " << rt;
return rt;
}
(void)name_queue_map_.insert(std::make_pair(channel_name, queue));
init_ = true;
return DataQueueStatus::SUCCESS;
}
(void)name_queue_map_.insert(std::make_pair(channel_name, queue));
init_ = true;
return SUCCESS;
#endif
MS_LOG(ERROR) << "Dynamic data queue only support Ascend/GPU target, bug got " << device_name;
return DataQueueStatus::QUEUE_EXIST;
}
BlockQueueStatus_T DataQueueMgr::Open(const std::string &channel_name) const {
DataQueueStatus DataQueueMgr::Open(const std::string &channel_name) const {
if (name_queue_map_.find(channel_name) == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
return SUCCESS;
return DataQueueStatus::SUCCESS;
}
BlockQueueStatus_T DataQueueMgr::Push(const std::string &channel_name, const std::vector<DataQueueItem> &data,
unsigned int timeout_in_sec) {
DataQueueStatus DataQueueMgr::Push(const std::string &channel_name, const std::vector<DataQueueItem> &data,
unsigned int timeout_in_sec) {
auto iter = name_queue_map_.find(channel_name);
if (iter == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
return iter->second->Push(data, timeout_in_sec);
}
BlockQueueStatus_T DataQueueMgr::Front(const std::string &channel_name, std::vector<DataQueueItem> *data) {
DataQueueStatus DataQueueMgr::Front(const std::string &channel_name, std::vector<DataQueueItem> *data) {
auto iter = name_queue_map_.find(channel_name);
if (iter == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
return iter->second->Front(data);
}
BlockQueueStatus_T DataQueueMgr::Pop(const std::string &channel_name) {
DataQueueStatus DataQueueMgr::Pop(const std::string &channel_name) {
auto iter = name_queue_map_.find(channel_name);
if (iter == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
return iter->second->Pop();
}
BlockQueueStatus_T DataQueueMgr::Clear(const std::string &channel_name) {
DataQueueStatus DataQueueMgr::Clear(const std::string &channel_name) {
auto iter = name_queue_map_.find(channel_name);
if (iter == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return QUEUE_NOT_EXIST;
return DataQueueStatus::QUEUE_NOT_EXIST;
}
return iter->second->Clear();
@ -238,11 +251,39 @@ size_t DataQueueMgr::Capacity(const std::string &channel_name) {
return name_queue_map_.at(channel_name)->Capacity();
}
std::shared_ptr<DataQueue> DataQueueMgr::GetDataQueue(const std::string &channel_name) const {
auto iter = name_queue_map_.find(channel_name);
if (iter == name_queue_map_.end()) {
MS_LOG(ERROR) << "Queue not exist " << channel_name;
return nullptr;
}
MS_EXCEPTION_IF_NULL(iter->second);
return iter->second->Queue();
}
DataQueueStatus DataQueueMgr::SetThreadDevice(const std::string &device_name) {
auto tmp_queue = CreateDataQueue(device_name, {}, false, 0, nullptr, {});
if (tmp_queue == nullptr) {
return DataQueueStatus::QUEUE_NOT_EXIST;
}
tmp_queue->SetThreadDevice();
return DataQueueStatus::SUCCESS;
}
std::shared_ptr<void> DataQueueMgr::AllocHostMem(const std::string &device_name, size_t size) {
auto tmp_queue = CreateDataQueue(device_name, {}, false, 0, nullptr, {});
if (tmp_queue == nullptr) {
return std::shared_ptr<void>(::malloc(size), ::free);
}
return tmp_queue->AllocHostMem(size);
}
#ifndef BUILD_LITE
bool PopDataFromDataQueue(const AnfNodePtr &data_kernel) {
auto queue_name = common::AnfAlgo::GetNodeAttr<std::string>(data_kernel, "shared_name");
device::DataQueueMgr &buf_mgr = device::DataQueueMgr::GetInstance();
auto ret = buf_mgr.OpenDynamicBufQueue(queue_name);
MS_EXCEPTION_IF_CHECK_FAIL(ret == device::BlockQueueStatus_T::SUCCESS, "Open dynamic data queue failed");
MS_EXCEPTION_IF_CHECK_FAIL(ret == device::DataQueueStatus::SUCCESS, "Open dynamic data queue failed");
std::vector<device::DataQueueItem> data;
auto kernel_info = dynamic_cast<device::KernelInfo *>(data_kernel->kernel_info());
(void)buf_mgr.Front(queue_name, &data);
@ -271,29 +312,6 @@ bool PopDataFromDataQueue(const AnfNodePtr &data_kernel) {
common::AnfAlgo::SetOutputInferTypeAndShape(types, shapes, data_kernel.get());
return true;
}
namespace {
struct DataQueueHandlerRegister {
DataQueueHandlerRegister() noexcept {
DataQueueHandler::SetOpenHandler(
[](const std::string channel_name, const std::function<void(void *, int32_t)> func) -> BlockQueueStatus_T {
return DataQueueMgr::GetInstance().Open(channel_name, func);
});
DataQueueHandler::SetOpenDynamicBufQueueHandler(
[](const std::string &channel_name, const std::function<void(void *, int32_t)> func) -> BlockQueueStatus_T {
return DataQueueMgr::GetInstance().OpenDynamicBufQueue(channel_name, func);
});
DataQueueHandler::SetIsClosedHandler([]() -> bool { return DataQueueMgr::GetInstance().IsClosed(); });
DataQueueHandler::SetCloseConfirmHandler([]() { DataQueueMgr::GetInstance().CloseConfirm(); });
DataQueueHandler::SetCloseHandler([](const std::string &channel) { DataQueueMgr::GetInstance().Close(channel); });
DataQueueHandler::SetClearHandler([](const std::string &channel) -> device::BlockQueueStatus_T {
return DataQueueMgr::GetInstance().Clear(channel);
});
DataQueueHandler::SetPushHandler([](const std::string &channel, const std::vector<device::DataQueueItem> &items,
unsigned int timeout) -> device::BlockQueueStatus_T {
return DataQueueMgr::GetInstance().Push(channel, items, timeout);
});
}
} callback_register;
} // namespace
#endif
} // namespace device
} // namespace mindspore

View File

@ -1,131 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_DATA_QUEUE_MGR_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_DATA_QUEUE_MGR_H_
#include <unistd.h>
#include <iostream>
#include <functional>
#include <map>
#include <vector>
#include <string>
#include <memory>
#include "runtime/data_queue/blocking_queue.h"
#define EXPORT __attribute__((visibility("default")))
namespace mindspore {
namespace device {
static const unsigned int MAX_WAIT_TIME_IN_SEC = 60;
class Semaphore {
public:
explicit Semaphore(int count = 0) : count_(count) {}
inline void Signal() {
std::unique_lock<std::mutex> lock(mutex_);
++count_;
cv_.notify_one();
}
inline bool Wait() {
std::unique_lock<std::mutex> lock(mutex_);
while (count_ == 0) {
if (cv_.wait_for(lock, std::chrono::seconds(MAX_WAIT_TIME_IN_SEC)) == std::cv_status::timeout) {
return false;
}
}
--count_;
return true;
}
private:
std::mutex mutex_;
std::condition_variable cv_;
int count_;
};
class DataQueueMgr {
public:
EXPORT DataQueueMgr() : cur_dev_id_(0), init_(false), closed_(false), open_by_dataset_(0) {}
EXPORT virtual ~DataQueueMgr() = default;
EXPORT static DataQueueMgr &GetInstance() noexcept;
EXPORT BlockQueueStatus_T Create(const std::string &channel_name, void *addr, const std::vector<size_t> &shape,
const size_t &capacity);
// call for Push thread
EXPORT BlockQueueStatus_T Open(const std::string &channel_name, std::function<void(void *, int32_t)> func);
// call for Front/Pop thread
EXPORT BlockQueueStatus_T Open(const std::string &channel_name) const;
EXPORT BlockQueueStatus_T Push(const std::string &channel_name, const std::vector<DataQueueItem> &data,
unsigned int timeout_in_sec);
EXPORT BlockQueueStatus_T Front(const std::string &channel_name, std::vector<DataQueueItem> *data);
EXPORT BlockQueueStatus_T Pop(const std::string &channel_name);
EXPORT BlockQueueStatus_T Clear(const std::string &channel_name);
EXPORT BlockQueueStatus_T OpenDynamicBufQueue(const std::string &channel_name,
const std::function<void(void *, int32_t)> func);
EXPORT BlockQueueStatus_T CreateDynamicBufQueue(const std::string &channel_name, const size_t &capacity);
EXPORT BlockQueueStatus_T OpenDynamicBufQueue(const std::string &channel_name);
EXPORT void Close(const std::string &channel_name) const noexcept;
EXPORT bool IsInit() const;
EXPORT bool IsClosed() const;
EXPORT bool Destroy();
// call for Release GPU Resources
EXPORT bool CloseNotify();
// call for dataset send thread
EXPORT void CloseConfirm();
EXPORT size_t Size(const std::string &channel_name);
EXPORT size_t Capacity(const std::string &channel_name);
private:
int cur_dev_id_;
bool init_;
bool closed_;
std::mutex mutex_;
std::mutex close_mutex_;
std::condition_variable cv_;
// how many queues opened by dataset
int open_by_dataset_;
Semaphore sema;
bool dynamic_shape_{false};
size_t default_capacity_{2};
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
inline bool isCreated(const std::string &channel_name) const;
DataQueueMgr(const DataQueueMgr &) = delete;
DataQueueMgr &operator=(const DataQueueMgr &) = delete;
};
bool PopDataFromDataQueue(const AnfNodePtr &data_kernel);
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_BUFFER_MGR_H_

View File

@ -11,10 +11,5 @@ if("${ENABLE_HIDDEN}" STREQUAL "OFF")
string(REPLACE " -fvisibility=hidden" " -fvisibility=default" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
if(ENABLE_TDTQUE)
file(GLOB_RECURSE TDT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../minddata/dataset/engine/tdt/tdt_handle.cc")
endif()
set_property(SOURCE ${DEVICE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
add_library(_mindspore_runtime_device_obj OBJECT ${DEVICE_SRC_LIST} ${TDT_SRC_LIST})
add_library(_mindspore_runtime_device_obj OBJECT ${DEVICE_SRC_LIST})

View File

@ -1,20 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/data_queue_handler.h"
namespace {
// empty source file trick to avoid symbol ex/import problem on Windows
} // namespace

View File

@ -1,50 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_DATA_QUEUE_HANDLER_H_
#define MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_DATA_QUEUE_HANDLER_H_
#include <string>
#include <utility>
#include <functional>
#include <vector>
#include "utils/macros.h"
#include "utils/callback_handler.h"
namespace mindspore {
namespace device {
enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, QUEUE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
struct DataQueueItem {
int32_t worker_id_{0};
std::string data_type_;
size_t data_len_{0};
void *data_ptr_{nullptr};
std::vector<int64_t> shapes_;
void *device_addr_{nullptr};
};
} // namespace device
class MS_EXPORT DataQueueHandler {
HANDLER_DEFINE(device::BlockQueueStatus_T, OpenDynamicBufQueue, const std::string &,
const std::function<void(void *, int32_t)>);
HANDLER_DEFINE(device::BlockQueueStatus_T, Open, const std::string &, const std::function<void(void *, int32_t)>);
HANDLER_DEFINE(bool, IsClosed);
HANDLER_DEFINE(void, CloseConfirm);
HANDLER_DEFINE(void, Close, const std::string &);
HANDLER_DEFINE(device::BlockQueueStatus_T, Clear, const std::string &);
HANDLER_DEFINE(device::BlockQueueStatus_T, Push, const std::string &, const std::vector<device::DataQueueItem> &,
unsigned int);
};
} // namespace mindspore
#endif // MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_DATA_QUEUE_HANDLER_H_

View File

@ -39,7 +39,6 @@ constexpr auto kSplitLine = "\n-------------------------------------------------
#if defined(__ANDROID__) || defined(ANDROID)
constexpr const char *ANDROID_LOG_TAG = "MS_LITE";
#endif
std::map<void **, std::thread *> acl_handle_map;
// set default log level to WARNING for all sub modules
int g_ms_submodule_log_levels[NUM_SUBMODUES] = {WARNING};
#if defined(_WIN32) || defined(_WIN64)

View File

@ -47,7 +47,6 @@ static constexpr size_t GetRelPathPos() noexcept {
}
namespace mindspore {
/// \brief The handler map for ACL.
MS_CORE_API extern std::map<void **, std::thread *> acl_handle_map;
#define FILE_NAME \
(sizeof(__FILE__) > GetRelPathPos() ? static_cast<const char *>(__FILE__) + GetRelPathPos() \
: static_cast<const char *>(__FILE__))

View File

@ -53,7 +53,7 @@ TEST_F(TestCompileSegmentRunner, test_MsVmConvert1) {
std::shared_ptr<mindspore::FuncGraphManager> manager = mindspore::Manage(g);
BackendPtr b = std::make_shared<Backend>("vm");
auto graph_partition = std::make_shared<GraphPartition>(nonlinear_ops, b->name());
auto graph_partition = std::make_shared<GraphPartition>(GetNonlinearOps(), b->name());
auto segments = graph_partition->Partition(g);
VectorRef args({1.0, 2.0});
@ -67,7 +67,7 @@ TEST_F(TestCompileSegmentRunner, test_MsVmConvert2) {
std::shared_ptr<mindspore::FuncGraphManager> manager = mindspore::Manage(g);
BackendPtr b = std::make_shared<Backend>("vm");
auto graph_partition = std::make_shared<GraphPartition>(nonlinear_ops, b->name());
auto graph_partition = std::make_shared<GraphPartition>(GetNonlinearOps(), b->name());
auto segments = graph_partition->Partition(g);
VectorRef args({1.0, 2.0});
@ -81,7 +81,7 @@ TEST_F(TestCompileSegmentRunner, test_if) {
std::shared_ptr<mindspore::FuncGraphManager> manager = mindspore::Manage(g);
BackendPtr b = std::make_shared<Backend>("vm");
auto graph_partition = std::make_shared<GraphPartition>(nonlinear_ops, b->name());
auto graph_partition = std::make_shared<GraphPartition>(GetNonlinearOps(), b->name());
auto segments = graph_partition->Partition(g);
VectorRef args({1.0, 2.0});