Add Exception log when AscendMemoryPool::AllocTensorMem fail

This commit is contained in:
tanghuikang 2021-11-15 20:23:23 +08:00
parent 9dfe08b805
commit f0995e7899
5 changed files with 29 additions and 7 deletions

View File

@ -23,6 +23,7 @@
"mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond" "mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond"
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm" "mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro" "mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
"mindspore/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "nullPointerArithmeticRedundantCheck"
# MindData # MindData
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm" "mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"

View File

@ -130,17 +130,23 @@ bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
void *AscendPsCache::MallocMemory(size_t size) { void *AscendPsCache::MallocMemory(size_t size) {
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size); const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_addr); if (device_addr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
}
return device_addr; return device_addr;
} }
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) { bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int))); offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
MS_ERROR_IF_NULL(offset_addr_); if (offset_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
}
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int)); rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
cache_vocab_size_addr_ = cache_vocab_size_addr_ =
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int))); reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
MS_ERROR_IF_NULL(cache_vocab_size_addr_); if (cache_vocab_size_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
}
int copy_value = SizeToInt(cache_vocab_size); int copy_value = SizeToInt(cache_vocab_size);
if (!CopyHostMemToDevice(cache_vocab_size_addr_, &copy_value, sizeof(int))) { if (!CopyHostMemToDevice(cache_vocab_size_addr_, &copy_value, sizeof(int))) {
return false; return false;

View File

@ -27,7 +27,9 @@ size_t AscendLaunchKernel::AlignSizeForLaunchKernel(size_t size) { return Memory
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) { uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size); auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_memory); if (device_memory == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
}
return static_cast<uint8_t *>(device_memory); return static_cast<uint8_t *>(device_memory);
} }

View File

@ -49,7 +49,10 @@ void *AscendMemoryManager::MallocDevice(size_t size) {
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) { void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
auto align_size = GetCommonAlignSize(size); auto align_size = GetCommonAlignSize(size);
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size); const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
MS_EXCEPTION_IF_NULL(device_addr); if (device_addr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
return device_addr; return device_addr;
} }
@ -79,7 +82,10 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
#endif #endif
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size)); uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(alloc_address); if (alloc_address == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory // create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
return communication_mem ? alloc_address + kMemAlignSize : alloc_address; return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
} }
@ -114,7 +120,10 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap
uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) { uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
auto align_size = GetCommunicationAlignSize(size); auto align_size = GetCommunicationAlignSize(size);
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size)); uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
MS_EXCEPTION_IF_NULL(base_ptr); if (base_ptr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
}
return base_ptr + kMemAlignSize; return base_ptr + kMemAlignSize;
} }

View File

@ -97,6 +97,8 @@ fi
CHECK_RESULT_FILE=__code_format_check_result__ CHECK_RESULT_FILE=__code_format_check_result__
echo "0" > "$CHECK_RESULT_FILE" echo "0" > "$CHECK_RESULT_FILE"
set +e
# check format of files modified in the latest commit # check format of files modified in the latest commit
while read line; do while read line; do
if [ ! -e ${line} ]; then if [ ! -e ${line} ]; then
@ -116,6 +118,8 @@ while read line; do
fi fi
done < "${CHECK_LIST_FILE}" done < "${CHECK_LIST_FILE}"
set -e
result=$(cat "${CHECK_RESULT_FILE}") result=$(cat "${CHECK_RESULT_FILE}")
rm "${CHECK_RESULT_FILE}" rm "${CHECK_RESULT_FILE}"
rm "${CHECK_LIST_FILE}" rm "${CHECK_LIST_FILE}"