!26320 Add Exception log when AscendMemoryPool::AllocTensorMem fail

Merge pull request !26320 from tanghuikang/oom_nullptr_log
This commit is contained in:
i-robot 2021-11-16 12:46:48 +00:00 committed by Gitee
commit 8bf903ba19
5 changed files with 29 additions and 7 deletions

View File

@ -23,6 +23,7 @@
"mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond"
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
"mindspore/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "nullPointerArithmeticRedundantCheck"
# MindData
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"

View File

@ -130,17 +130,23 @@ bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
void *AscendPsCache::MallocMemory(size_t size) {
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_addr);
if (device_addr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
}
return device_addr;
}
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
MS_ERROR_IF_NULL(offset_addr_);
if (offset_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
}
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
cache_vocab_size_addr_ =
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
MS_ERROR_IF_NULL(cache_vocab_size_addr_);
if (cache_vocab_size_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
}
int copy_value = SizeToInt(cache_vocab_size);
if (!CopyHostMemToDevice(cache_vocab_size_addr_, &copy_value, sizeof(int))) {
return false;

View File

@ -27,7 +27,9 @@ size_t AscendLaunchKernel::AlignSizeForLaunchKernel(size_t size) { return Memory
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_memory);
if (device_memory == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
}
return static_cast<uint8_t *>(device_memory);
}

View File

@ -49,7 +49,10 @@ void *AscendMemoryManager::MallocDevice(size_t size) {
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
auto align_size = GetCommonAlignSize(size);
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
MS_EXCEPTION_IF_NULL(device_addr);
if (device_addr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
return device_addr;
}
@ -79,7 +82,10 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
#endif
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address);
if (alloc_address == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
}
@ -114,7 +120,10 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap
uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
auto align_size = GetCommunicationAlignSize(size);
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(base_ptr);
if (base_ptr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
return base_ptr + kMemAlignSize;
}

View File

@ -97,6 +97,8 @@ fi
CHECK_RESULT_FILE=__code_format_check_result__
echo "0" > "$CHECK_RESULT_FILE"
set +e
# check format of files modified in the latest commit
while read line; do
if [ ! -e ${line} ]; then
@ -116,6 +118,8 @@ while read line; do
fi
done < "${CHECK_LIST_FILE}"
set -e
result=$(cat "${CHECK_RESULT_FILE}")
rm "${CHECK_RESULT_FILE}"
rm "${CHECK_LIST_FILE}"