forked from mindspore-Ecosystem/mindspore
!26675 Optimize oom log
Merge pull request !26675 from tanghuikang/oom_nullptr_log
This commit is contained in:
commit
df25ee8c68
|
@ -131,7 +131,7 @@ bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
|
|||
void *AscendPsCache::MallocMemory(size_t size) {
|
||||
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
if (device_addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size;
|
||||
}
|
||||
return device_addr;
|
||||
}
|
||||
|
@ -139,13 +139,13 @@ void *AscendPsCache::MallocMemory(size_t size) {
|
|||
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
|
||||
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
if (offset_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
|
||||
}
|
||||
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
|
||||
cache_vocab_size_addr_ =
|
||||
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
if (cache_vocab_size_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
|
||||
}
|
||||
int copy_value = SizeToInt(cache_vocab_size);
|
||||
if (!CopyHostMemToDevice(cache_vocab_size_addr_, ©_value, sizeof(int))) {
|
||||
|
|
|
@ -28,7 +28,7 @@ size_t AscendLaunchKernel::AlignSizeForLaunchKernel(size_t size) { return Memory
|
|||
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
|
||||
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
if (device_memory == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size;
|
||||
}
|
||||
return static_cast<uint8_t *>(device_memory);
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
|||
auto align_size = GetCommonAlignSize(size);
|
||||
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
||||
if (device_addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
return device_addr;
|
||||
|
@ -83,7 +83,7 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
|||
|
||||
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||
if (alloc_address == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
||||
|
@ -121,7 +121,7 @@ uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
|
|||
auto align_size = GetCommunicationAlignSize(size);
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||
if (base_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
return base_ptr + kMemAlignSize;
|
||||
|
|
|
@ -32,7 +32,9 @@ size_t AscendMemoryPool::CalMemBlockAllocSize(size_t size) {
|
|||
auto device_free_mem_size = free_mem_size();
|
||||
if (device_free_mem_size < size) {
|
||||
MS_LOG(WARNING) << "Out of Memory. Request memory size: " << size
|
||||
<< ", Memory Statistic:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
<< ", Memory Statistic:" << AscendMemAdapter::GetInstance().DevMemStatistics()
|
||||
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||
return 0;
|
||||
}
|
||||
auto alloc_mem_size = ASCEND_DYNAMIC_MEM_ALLOC_UNIT_SIZE;
|
||||
|
|
Loading…
Reference in New Issue