forked from mindspore-Ecosystem/mindspore
!26320 Add Exception log when AscendMemoryPool::AllocTensorMem fail
Merge pull request !26320 from tanghuikang/oom_nullptr_log
This commit is contained in:
commit
8bf903ba19
|
@ -23,6 +23,7 @@
|
|||
"mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond"
|
||||
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
|
||||
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
|
||||
"mindspore/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "nullPointerArithmeticRedundantCheck"
|
||||
|
||||
# MindData
|
||||
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"
|
||||
|
|
|
@ -130,17 +130,23 @@ bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
|
|||
|
||||
void *AscendPsCache::MallocMemory(size_t size) {
|
||||
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
MS_EXCEPTION_IF_NULL(device_addr);
|
||||
if (device_addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||
}
|
||||
return device_addr;
|
||||
}
|
||||
|
||||
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
|
||||
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
MS_ERROR_IF_NULL(offset_addr_);
|
||||
if (offset_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||
}
|
||||
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
|
||||
cache_vocab_size_addr_ =
|
||||
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
MS_ERROR_IF_NULL(cache_vocab_size_addr_);
|
||||
if (cache_vocab_size_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||
}
|
||||
int copy_value = SizeToInt(cache_vocab_size);
|
||||
if (!CopyHostMemToDevice(cache_vocab_size_addr_, ©_value, sizeof(int))) {
|
||||
return false;
|
||||
|
|
|
@ -27,7 +27,9 @@ size_t AscendLaunchKernel::AlignSizeForLaunchKernel(size_t size) { return Memory
|
|||
|
||||
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
|
||||
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
MS_EXCEPTION_IF_NULL(device_memory);
|
||||
if (device_memory == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||
}
|
||||
return static_cast<uint8_t *>(device_memory);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,10 @@ void *AscendMemoryManager::MallocDevice(size_t size) {
|
|||
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
||||
auto align_size = GetCommonAlignSize(size);
|
||||
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
||||
MS_EXCEPTION_IF_NULL(device_addr);
|
||||
if (device_addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
return device_addr;
|
||||
}
|
||||
|
||||
|
@ -79,7 +82,10 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
|||
#endif
|
||||
|
||||
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||
MS_EXCEPTION_IF_NULL(alloc_address);
|
||||
if (alloc_address == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
||||
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
|
||||
}
|
||||
|
@ -114,7 +120,10 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap
|
|||
uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
|
||||
auto align_size = GetCommunicationAlignSize(size);
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||
MS_EXCEPTION_IF_NULL(base_ptr);
|
||||
if (base_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
return base_ptr + kMemAlignSize;
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,8 @@ fi
|
|||
CHECK_RESULT_FILE=__code_format_check_result__
|
||||
echo "0" > "$CHECK_RESULT_FILE"
|
||||
|
||||
set +e
|
||||
|
||||
# check format of files modified in the latest commit
|
||||
while read line; do
|
||||
if [ ! -e ${line} ]; then
|
||||
|
@ -116,6 +118,8 @@ while read line; do
|
|||
fi
|
||||
done < "${CHECK_LIST_FILE}"
|
||||
|
||||
set -e
|
||||
|
||||
result=$(cat "${CHECK_RESULT_FILE}")
|
||||
rm "${CHECK_RESULT_FILE}"
|
||||
rm "${CHECK_LIST_FILE}"
|
||||
|
|
Loading…
Reference in New Issue