forked from mindspore-Ecosystem/mindspore
dynamic mem pool add classification statistics info
This commit is contained in:
parent
384e6ca851
commit
8c32aa2c7e
|
@ -32,8 +32,15 @@ const size_t kMinimumAllocMem = 10 << 20;
|
|||
thread_local AllocatorDebugInfo DynamicMemAllocatorDebugInfo::debug_info_;
|
||||
|
||||
static const std::map<DynamicMemBufStatus, std::string> kBufStatusString = {
|
||||
{kMemBufIdle, "idle"},
|
||||
{kMemBufUsed, "used"},
|
||||
{DynamicMemBufStatus::kMemBufIdle, "idle"},
|
||||
{DynamicMemBufStatus::kMemBufUsed, "used"},
|
||||
};
|
||||
|
||||
static const std::map<AllocatorType, std::string> kAllocatorTypeString = {
|
||||
{AllocatorType::kWeight, "weight"},
|
||||
{AllocatorType::kConstantValue, "constant value"},
|
||||
{AllocatorType::kKernelOutput, "kernel output"},
|
||||
{AllocatorType::kOther, "other"},
|
||||
};
|
||||
|
||||
DynamicMemPoolBestFit::~DynamicMemPoolBestFit() {
|
||||
|
@ -90,8 +97,9 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
|
|||
DynamicMemBufPtr continuous_mem_buf;
|
||||
auto buf_addr = device_addr;
|
||||
for (size_t i : size_list) {
|
||||
continuous_mem_buf =
|
||||
std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, i, DynamicMemAllocatorDebugInfo::GetDebugInfo().name_);
|
||||
continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, DynamicMemBufStatus::kMemBufUsed, i,
|
||||
DynamicMemAllocatorDebugInfo::GetDebugInfo().name_,
|
||||
DynamicMemAllocatorDebugInfo::GetDebugInfo().type_);
|
||||
MS_EXCEPTION_IF_NULL(continuous_mem_buf);
|
||||
(void)mem_block->block_all_mem_buf_map_.emplace(buf_addr, continuous_mem_buf);
|
||||
device_addr_list.emplace_back(buf_addr);
|
||||
|
@ -119,13 +127,14 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size, bool from_persis
|
|||
if (iter != mem_mng->idle_mem_buf_map_.end()) {
|
||||
auto mem_buf = iter->second;
|
||||
MS_EXCEPTION_IF_NULL(mem_buf);
|
||||
if (mem_buf->status_ != kMemBufIdle) {
|
||||
if (mem_buf->status_ != DynamicMemBufStatus::kMemBufIdle) {
|
||||
DumpDynamicMemPoolDebugInfo();
|
||||
MS_LOG(EXCEPTION) << "Find the mem_buf is not idle, alloc_size[" << size << "] mem_buf_size[" << mem_buf->size_
|
||||
<< "] mem_buf_address[" << mem_buf->device_addr_ << "].";
|
||||
}
|
||||
mem_buf->status_ = kMemBufUsed;
|
||||
mem_buf->status_ = DynamicMemBufStatus::kMemBufUsed;
|
||||
mem_buf->allocator_name_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().name_;
|
||||
mem_buf->allocator_type_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().type_;
|
||||
// Remove map of old idle memory buf
|
||||
(void)mem_mng->idle_mem_buf_map_.erase(iter);
|
||||
// Divide memory buf
|
||||
|
@ -215,8 +224,9 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size, bool from_
|
|||
std::upper_bound(mem_mng->mem_block_list_.begin(), mem_mng->mem_block_list_.end(), device_addr, CmpMemBlock);
|
||||
(void)mem_mng->mem_block_list_.insert(iter, mem_block);
|
||||
// Add new memory buf
|
||||
auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, kMemBufUsed, real_alloc_size,
|
||||
DynamicMemAllocatorDebugInfo::GetDebugInfo().name_);
|
||||
auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, DynamicMemBufStatus::kMemBufUsed, real_alloc_size,
|
||||
DynamicMemAllocatorDebugInfo::GetDebugInfo().name_,
|
||||
DynamicMemAllocatorDebugInfo::GetDebugInfo().type_);
|
||||
MS_EXCEPTION_IF_NULL(mem_buf);
|
||||
// Add map of new memory buf in the block
|
||||
(void)mem_block->block_all_mem_buf_map_.emplace(device_addr, mem_buf);
|
||||
|
@ -273,7 +283,7 @@ void DynamicMemPoolBestFit::SplitMemBuf(size_t size, const DynamicMemBufPtr &mem
|
|||
size_t newbuf_size = mem_buf->size_ - size;
|
||||
mem_buf->size_ = size;
|
||||
DeviceMemPtr newbuf_addr = AddressOffset(mem_buf->device_addr_, size);
|
||||
auto new_mem_buf = std::make_shared<DynamicMemBuf>(newbuf_addr, kMemBufIdle, newbuf_size);
|
||||
auto new_mem_buf = std::make_shared<DynamicMemBuf>(newbuf_addr, DynamicMemBufStatus::kMemBufIdle, newbuf_size);
|
||||
// Add map of new memory buf in the block
|
||||
(void)mem_block->block_all_mem_buf_map_.emplace(newbuf_addr, new_mem_buf);
|
||||
// Add map of new idle memory buf
|
||||
|
@ -335,11 +345,11 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
|
|||
}
|
||||
auto mem_buf = iter->second;
|
||||
MS_EXCEPTION_IF_NULL(mem_buf);
|
||||
if (mem_buf->status_ != kMemBufUsed) {
|
||||
if (mem_buf->status_ != DynamicMemBufStatus::kMemBufUsed) {
|
||||
DumpDynamicMemPoolDebugInfo();
|
||||
MS_LOG(EXCEPTION) << "Find the mem_buf is not used, mem_buf_address[" << mem_buf->device_addr_ << "].";
|
||||
}
|
||||
mem_buf->status_ = kMemBufIdle;
|
||||
mem_buf->status_ = DynamicMemBufStatus::kMemBufIdle;
|
||||
if (mem_mng->mps_.total_used_mem_size_ < mem_buf->size_) {
|
||||
DumpDynamicMemPoolDebugInfo();
|
||||
MS_LOG(EXCEPTION) << "The total used mem size is less than the size of membuf.";
|
||||
|
@ -351,7 +361,7 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
|
|||
if (next_iter != mem_block->block_all_mem_buf_map_.end()) {
|
||||
auto next_mem_buf = next_iter->second;
|
||||
MS_EXCEPTION_IF_NULL(next_mem_buf);
|
||||
if (next_mem_buf->status_ == kMemBufIdle) {
|
||||
if (next_mem_buf->status_ == DynamicMemBufStatus::kMemBufIdle) {
|
||||
mem_buf->size_ += next_mem_buf->size_;
|
||||
EraseIdleMemBuf(next_mem_buf->size_, next_mem_buf->device_addr_, mem_mng);
|
||||
(void)mem_block->block_all_mem_buf_map_.erase(next_iter);
|
||||
|
@ -365,7 +375,7 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
|
|||
(void)prev_iter--;
|
||||
prev_mem_buf = prev_iter->second;
|
||||
MS_EXCEPTION_IF_NULL(prev_mem_buf);
|
||||
if (prev_mem_buf->status_ == kMemBufIdle) {
|
||||
if (prev_mem_buf->status_ == DynamicMemBufStatus::kMemBufIdle) {
|
||||
EraseIdleMemBuf(prev_mem_buf->size_, prev_mem_buf->device_addr_, mem_mng);
|
||||
prev_mem_buf->size_ += mem_buf->size_;
|
||||
(void)mem_block->block_all_mem_buf_map_.erase(iter);
|
||||
|
@ -398,6 +408,8 @@ void DynamicMemPoolBestFit::EraseIdleMemBuf(size_t size, const DeviceMemPtr &dev
|
|||
|
||||
void DynamicMemPoolBestFit::ReleaseDeviceRes() {
|
||||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
DumpDynamicMemPoolStateInfo();
|
||||
|
||||
auto fn = [this](const MemStatusManagerPtr &mem_mng) {
|
||||
for (auto &iter : mem_mng->mem_block_list_) {
|
||||
auto &device_addr = iter->device_addr_base_;
|
||||
|
@ -416,32 +428,50 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() {
|
|||
}
|
||||
|
||||
void DynamicMemPoolBestFit::DumpDynamicMemPoolStateInfo() {
|
||||
auto fn = [](const MemStatusManagerPtr &mem_mng, const std::string &mem_type) {
|
||||
size_t total_used_size_list[ALLOCATOR_TYPE_NUM] = {0};
|
||||
auto fn = [&](const MemStatusManagerPtr &mem_mng, const std::string &mem_type) {
|
||||
if (mem_mng->mem_block_list_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::ostringstream buf;
|
||||
for (size_t i = 0; i < mem_mng->mem_block_list_.size(); ++i) {
|
||||
size_t idle_size = 0;
|
||||
size_t mem_block_used_size = 0;
|
||||
for (auto mb = mem_mng->mem_block_list_[i]->block_all_mem_buf_map_.begin();
|
||||
mb != mem_mng->mem_block_list_[i]->block_all_mem_buf_map_.end(); ++mb) {
|
||||
if (mb->second->status_ == kMemBufIdle) {
|
||||
idle_size += mb->second->size_;
|
||||
if (mb->second->status_ == DynamicMemBufStatus::kMemBufUsed) {
|
||||
mem_block_used_size += mb->second->size_;
|
||||
MS_EXCEPTION_IF_CHECK_FAIL((static_cast<int>(mb->second->allocator_type_) < ALLOCATOR_TYPE_NUM),
|
||||
"Allocator type is out of range.");
|
||||
total_used_size_list[static_cast<int>(mb->second->allocator_type_)] += mb->second->size_;
|
||||
}
|
||||
}
|
||||
buf << ", block[" << i << "] block size:" << mem_mng->mem_block_list_[i]->mem_block_size_
|
||||
<< " idle size:" << idle_size;
|
||||
buf << ", block[" << i << "] block size:" << mem_mng->mem_block_list_[i]->mem_block_size_ / kMBToByte
|
||||
<< "M idle size:" << (mem_mng->mem_block_list_[i]->mem_block_size_ - mem_block_used_size) / kMBToByte << "M";
|
||||
}
|
||||
|
||||
// Dump all the memory buf info
|
||||
MS_LOG(WARNING) << mem_type << " pool info: block unit size " << mem_mng->unit_size_ << ", block counts "
|
||||
<< mem_mng->mem_block_list_.size() << buf.str() << ". Total allocated mem "
|
||||
<< mem_mng->mps_.total_mem_size_ << ", peak used mem " << mem_mng->mps_.used_mem_peak_size_
|
||||
<< ", in used mem " << mem_mng->mps_.total_used_mem_size_ << ", total idle mem "
|
||||
<< mem_mng->mps_.total_mem_size_ - mem_mng->mps_.total_used_mem_size_;
|
||||
MS_LOG(INFO) << mem_type << " pool info: Total allocated mem:" << mem_mng->mps_.total_mem_size_ / kMBToByte
|
||||
<< "M, peak used mem:" << mem_mng->mps_.used_mem_peak_size_ / kMBToByte
|
||||
<< "M, in used mem:" << mem_mng->mps_.total_used_mem_size_ / kMBToByte << "M, total idle mem:"
|
||||
<< (mem_mng->mps_.total_mem_size_ - mem_mng->mps_.total_used_mem_size_) / kMBToByte
|
||||
<< "M. Block unit size:" << mem_mng->unit_size_ / kMBToByte
|
||||
<< "M, block counts:" << mem_mng->mem_block_list_.size() << buf.str();
|
||||
};
|
||||
|
||||
fn(common_mem_, std::string(kCommonMem));
|
||||
fn(persistent_mem_, std::string(kPersistentParamMem));
|
||||
MS_LOG(INFO) << "The dynamic memory pool total allocated mem:" << TotalMemStatistics() / kMBToByte
|
||||
<< "M, peak used mem:" << UsedMemPeakStatistics() / kMBToByte
|
||||
<< "M, in used mem:" << TotalUsedMemStatistics() / kMBToByte
|
||||
<< "M, total idle mem:" << (TotalMemStatistics() - TotalUsedMemStatistics()) / kMBToByte
|
||||
<< "M. Weight used size:" << total_used_size_list[static_cast<int>(AllocatorType::kWeight)] / kMBToByte
|
||||
<< "M, constant value used size:"
|
||||
<< total_used_size_list[static_cast<int>(AllocatorType::kConstantValue)] / kMBToByte
|
||||
<< "M, kernel output used size:"
|
||||
<< total_used_size_list[static_cast<int>(AllocatorType::kKernelOutput)] / kMBToByte
|
||||
<< "M, other used size:" << total_used_size_list[static_cast<int>(AllocatorType::kOther)] / kMBToByte
|
||||
<< "M.";
|
||||
}
|
||||
|
||||
void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() {
|
||||
|
@ -461,13 +491,14 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() {
|
|||
for (auto iter_mem_buf = mem_buf_map.begin(); iter_mem_buf != mem_buf_map.end(); ++iter_mem_buf) {
|
||||
auto mem_buf = iter_mem_buf->second;
|
||||
MS_EXCEPTION_IF_NULL(mem_buf);
|
||||
if (mem_buf->status_ == kMemBufIdle) {
|
||||
if (mem_buf->status_ == DynamicMemBufStatus::kMemBufIdle) {
|
||||
total_idle_mem1 += mem_buf->size_;
|
||||
} else {
|
||||
total_used_mem += mem_buf->size_;
|
||||
}
|
||||
MS_LOG(INFO) << " MemBuf info: address[" << mem_buf->device_addr_ << "] size[" << mem_buf->size_ << "] status["
|
||||
<< kBufStatusString.at(mem_buf->status_) << "] name[" << mem_buf->allocator_name_ << "].";
|
||||
<< kBufStatusString.at(mem_buf->status_) << "] name[" << mem_buf->allocator_name_ << "] type["
|
||||
<< kAllocatorTypeString.at(mem_buf->allocator_type_) << "].";
|
||||
}
|
||||
}
|
||||
// Dump all the idle memory buf info.
|
||||
|
@ -491,10 +522,10 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() {
|
|||
}
|
||||
};
|
||||
|
||||
MS_LOG(INFO) << "Start dump dynamic memory pool debug info.";
|
||||
MS_LOG(WARNING) << "Start dump dynamic memory pool debug info.";
|
||||
fn(common_mem_, std::string(kCommonMem));
|
||||
fn(persistent_mem_, std::string(kPersistentParamMem));
|
||||
MS_LOG(INFO) << "Finish dump dynamic memory pool debug info.";
|
||||
MS_LOG(WARNING) << "Finish dump dynamic memory pool debug info.";
|
||||
}
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -32,7 +32,11 @@ namespace device {
|
|||
using DeviceMemPtr = void(*);
|
||||
|
||||
// The status of memory buf.
|
||||
enum DynamicMemBufStatus : int { kMemBufIdle, kMemBufUsed };
|
||||
enum class DynamicMemBufStatus : int { kMemBufIdle, kMemBufUsed };
|
||||
|
||||
// Memory allocator type is used to record the memory classification statistics information.
|
||||
enum class AllocatorType : int { kWeight, kConstantValue, kKernelOutput, kOther };
|
||||
static const int ALLOCATOR_TYPE_NUM = 4;
|
||||
|
||||
// Alloc memory aligned according to 512 bytes.
|
||||
static const size_t DYNAMIC_MEM_ALIGN_SIZE = 512;
|
||||
|
@ -48,6 +52,7 @@ struct DeviceAddrCmp {
|
|||
// Recording information for debugging the memory allocator.
|
||||
struct AllocatorDebugInfo {
|
||||
std::string name_{"Unknown"};
|
||||
AllocatorType type_{AllocatorType::kOther};
|
||||
int input_index_{-1};
|
||||
int output_index_{-1};
|
||||
};
|
||||
|
@ -58,8 +63,9 @@ class DynamicMemAllocatorDebugInfo {
|
|||
static AllocatorDebugInfo &GetDebugInfo() noexcept { return debug_info_; }
|
||||
|
||||
// Set the debug info when memory alloc.
|
||||
static void SetDebugInfo(const std::string &name, int input_index = -1, int output_index = -1) {
|
||||
static void SetDebugInfo(const std::string &name, AllocatorType type, int input_index = -1, int output_index = -1) {
|
||||
debug_info_.name_ = name;
|
||||
debug_info_.type_ = type;
|
||||
debug_info_.input_index_ = input_index;
|
||||
debug_info_.output_index_ = output_index;
|
||||
}
|
||||
|
@ -75,12 +81,19 @@ class DynamicMemAllocatorDebugInfo {
|
|||
// Memory buf is the smallest operation object of dynamic memory pool.
|
||||
struct DynamicMemBuf {
|
||||
DynamicMemBuf(DeviceMemPtr addr, DynamicMemBufStatus status, size_t size,
|
||||
const std::string &allocator_name = "Unknown")
|
||||
: device_addr_(addr), status_(status), size_(size), allocator_name_(allocator_name) {}
|
||||
const std::string &allocator_name = "Unknown", AllocatorType allocator_type = AllocatorType::kOther)
|
||||
: device_addr_(addr),
|
||||
status_(status),
|
||||
size_(size),
|
||||
allocator_name_(allocator_name),
|
||||
allocator_type_{allocator_type} {}
|
||||
DeviceMemPtr device_addr_;
|
||||
DynamicMemBufStatus status_;
|
||||
size_t size_;
|
||||
|
||||
// Debug info.
|
||||
std::string allocator_name_;
|
||||
AllocatorType allocator_type_;
|
||||
};
|
||||
using DynamicMemBufPtr = std::shared_ptr<DynamicMemBuf>;
|
||||
// Multimap key is the tensor size, for finding the idle memory buf by tensor size.
|
||||
|
|
|
@ -390,7 +390,7 @@ void ControlActor::UpdateOutputData(OpData<DeviceTensor> *const output_data, con
|
|||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{device_tensor->device_name(), device_tensor->device_id()});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), 0);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), device::AllocatorType::kOther, 0);
|
||||
if ((device_tensor->GetPtr() == nullptr) &&
|
||||
(!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize()))) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_context,
|
||||
|
|
|
@ -198,7 +198,7 @@ void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {
|
|||
|
||||
// If the address ptr can't be changed, then alloc the new device memory and copy the data.
|
||||
if (input_device_tensor->is_ptr_persisted()) {
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name());
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), device::AllocatorType::kOther);
|
||||
if (!device_contexts_[i]->AllocateMemory(new_device_tensor.get(), new_device_tensor->GetSize())) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_contexts_[i],
|
||||
GetAID().Name(), new_device_tensor->GetSize());
|
||||
|
|
|
@ -40,7 +40,8 @@ void SyncTensorData(const TensorPtr &host_tensor, const DeviceTensorPtr &device_
|
|||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
|
||||
auto allocator_type = node->isa<ValueNode>() ? device::AllocatorType::kConstantValue : device::AllocatorType::kWeight;
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), allocator_type, 0);
|
||||
if ((device_tensor->GetPtr() == nullptr) &&
|
||||
(!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize()))) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(strategy, *context, *device_context, node->fullname_with_scope(),
|
||||
|
@ -131,7 +132,8 @@ void PrepareDataForValue(const ValuePtr &value, const KernelWithIndex &node_with
|
|||
return;
|
||||
}
|
||||
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), device::AllocatorType::kConstantValue,
|
||||
0);
|
||||
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(GraphExecutionStrategy::kPipeline, *context, *device_context,
|
||||
node->fullname_with_scope(), device_tensor->GetSize());
|
||||
|
@ -570,7 +572,7 @@ void DataPrepareActor::PrepareDataForControlValueNode(const KernelWithIndex &nod
|
|||
tensor->set_device_address(device_tensor);
|
||||
UpdateRefCount(device_tensor.get(), true);
|
||||
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->DebugString(), 0);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->DebugString(), device::AllocatorType::kConstantValue, 0);
|
||||
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *device_context,
|
||||
node->fullname_with_scope(), device_tensor->GetSize());
|
||||
|
@ -612,7 +614,8 @@ void DataPrepareActor::PrepareDataForValueNode(const ValueNodePtr &node, const A
|
|||
}
|
||||
MS_LOG(INFO) << "Prepare device data for value node: " << node->fullname_with_scope();
|
||||
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(), 0);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(node->fullname_with_scope(),
|
||||
device::AllocatorType::kConstantValue, 0);
|
||||
if (!device_context->AllocateMemory(device_tensor.get(), device_tensor->GetSize())) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *device_context,
|
||||
node->fullname_with_scope(), device_tensor->GetSize());
|
||||
|
@ -647,7 +650,8 @@ void DataPrepareActor::CopyDataFromDeviceTensorStore(const AnfNodePtr &front_nod
|
|||
const auto &another_device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{device::kDeviceTypeToName.at(another_device_type), device_context->device_context_key().device_id_});
|
||||
MS_EXCEPTION_IF_NULL(another_device_context);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(backend_node->fullname_with_scope(), 0);
|
||||
auto type = backend_node->isa<ValueNode>() ? device::AllocatorType::kConstantValue : device::AllocatorType::kWeight;
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(backend_node->fullname_with_scope(), type, 0);
|
||||
if ((another_device_tensor->GetPtr() == nullptr) &&
|
||||
(!another_device_context->AllocateMemory(another_device_tensor.get(), another_device_tensor->GetSize()))) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(real_strategy_, *context, *another_device_context,
|
||||
|
|
|
@ -331,7 +331,8 @@ void KernelActor::CopyInputDeviceTensor(const OpData<DeviceTensor> *input_data,
|
|||
// Update the input device tensor.
|
||||
input_device_tensors_[input_data->index_] = new_device_tensor.get();
|
||||
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), input_data->index_);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), device::AllocatorType::kKernelOutput,
|
||||
input_data->index_);
|
||||
if ((new_device_tensor->GetPtr() == nullptr) &&
|
||||
(!device_contexts_[0]->AllocateMemory(new_device_tensor.get(), new_device_tensor->GetSize()))) {
|
||||
SET_OPCONTEXT_MEMORY_ALLOC_FAIL_BY_STRATEGY(strategy_, *context, *(device_contexts_[0]), GetAID().Name(),
|
||||
|
|
|
@ -36,7 +36,7 @@ void MemoryManagerActor::AllocateMemory(const std::vector<DeviceTensor *> *alloc
|
|||
}
|
||||
try {
|
||||
// Allocate memory through the device context.
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name(), device::AllocatorType::kKernelOutput);
|
||||
if (!device_context->AllocateMemory(device_tensor, device_tensor->GetSize())) {
|
||||
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, device_tensor->GetSize(), op_context);
|
||||
return;
|
||||
|
@ -75,7 +75,7 @@ void MemoryManagerActor::AllocateContinuousMemory(const std::vector<std::vector<
|
|||
auto &device_context = (*device_contexts)[i];
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
// Allocate memory through the device context.
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name(), device::AllocatorType::kKernelOutput);
|
||||
if (!device_context->AllocateContinuousMemory(alloc_list, total_size, size_list)) {
|
||||
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, total_size, op_context);
|
||||
return;
|
||||
|
@ -108,7 +108,7 @@ void MemoryManagerActor::AllocateBatchMemory(const std::vector<DeviceTensor *> *
|
|||
|
||||
try {
|
||||
// Allocate memory through the device context.
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name());
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(from_aid.Name(), device::AllocatorType::kKernelOutput);
|
||||
if (!device_context->AllocateMemory(device_tensor, device_tensor->GetSize())) {
|
||||
SetOpContextMemoryAllocFail(from_aid.Name(), device_context, device_tensor->GetSize(), op_context);
|
||||
return;
|
||||
|
|
|
@ -262,7 +262,7 @@ void OutputActor::UpdateOutputDeviceAddress() {
|
|||
if (IsOutputAddressPersisted(device_tensor, output_node)) {
|
||||
auto device_context = device_contexts_[i];
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name());
|
||||
device::DynamicMemAllocatorDebugInfo::SetDebugInfo(GetAID().Name(), device::AllocatorType::kOther);
|
||||
if (!device_context->AllocateMemory(tensor_device_address.get(), tensor_device_address->GetSize())) {
|
||||
MS_LOG(EXCEPTION) << "Device(id:" << device_context->device_context_key().device_id_
|
||||
<< ") memory isn't enough and alloc failed, kernel name: "
|
||||
|
|
Loading…
Reference in New Issue