forked from mindspore-Ecosystem/mindspore
Add Exception log when AscendMemoryPool::AllocTensorMem fail
This commit is contained in:
parent
9dfe08b805
commit
f0995e7899
|
@ -23,6 +23,7 @@
|
||||||
"mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond"
|
"mindspore/mindspore/core/ops/avg_pool_3d.cc" "zerodivcond"
|
||||||
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
|
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
|
||||||
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
|
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
|
||||||
|
"mindspore/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "nullPointerArithmeticRedundantCheck"
|
||||||
|
|
||||||
# MindData
|
# MindData
|
||||||
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"
|
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"
|
||||||
|
|
|
@ -130,17 +130,23 @@ bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
|
||||||
|
|
||||||
void *AscendPsCache::MallocMemory(size_t size) {
|
void *AscendPsCache::MallocMemory(size_t size) {
|
||||||
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||||
MS_EXCEPTION_IF_NULL(device_addr);
|
if (device_addr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||||
|
}
|
||||||
return device_addr;
|
return device_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
|
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
|
||||||
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||||
MS_ERROR_IF_NULL(offset_addr_);
|
if (offset_addr_ == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||||
|
}
|
||||||
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
|
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
|
||||||
cache_vocab_size_addr_ =
|
cache_vocab_size_addr_ =
|
||||||
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||||
MS_ERROR_IF_NULL(cache_vocab_size_addr_);
|
if (cache_vocab_size_addr_ == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << sizeof(int);
|
||||||
|
}
|
||||||
int copy_value = SizeToInt(cache_vocab_size);
|
int copy_value = SizeToInt(cache_vocab_size);
|
||||||
if (!CopyHostMemToDevice(cache_vocab_size_addr_, ©_value, sizeof(int))) {
|
if (!CopyHostMemToDevice(cache_vocab_size_addr_, ©_value, sizeof(int))) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -27,7 +27,9 @@ size_t AscendLaunchKernel::AlignSizeForLaunchKernel(size_t size) { return Memory
|
||||||
|
|
||||||
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
|
uint8_t *AscendLaunchKernel::AllocDeviceMem(size_t size) {
|
||||||
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||||
MS_EXCEPTION_IF_NULL(device_memory);
|
if (device_memory == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << size;
|
||||||
|
}
|
||||||
return static_cast<uint8_t *>(device_memory);
|
return static_cast<uint8_t *>(device_memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,10 @@ void *AscendMemoryManager::MallocDevice(size_t size) {
|
||||||
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
||||||
auto align_size = GetCommonAlignSize(size);
|
auto align_size = GetCommonAlignSize(size);
|
||||||
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
const auto device_addr = AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
||||||
MS_EXCEPTION_IF_NULL(device_addr);
|
if (device_addr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||||
|
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||||
|
}
|
||||||
return device_addr;
|
return device_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +82,10 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||||
MS_EXCEPTION_IF_NULL(alloc_address);
|
if (alloc_address == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||||
|
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||||
|
}
|
||||||
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
// create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
|
||||||
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
|
return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
|
||||||
}
|
}
|
||||||
|
@ -114,7 +120,10 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap
|
||||||
uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
|
uint8_t *AscendMemoryManager::MallocCommunicationMemFromMemPool(size_t size) {
|
||||||
auto align_size = GetCommunicationAlignSize(size);
|
auto align_size = GetCommunicationAlignSize(size);
|
||||||
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
uint8_t *base_ptr = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
||||||
MS_EXCEPTION_IF_NULL(base_ptr);
|
if (base_ptr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Fail to alloc tensor memory, size: " << align_size
|
||||||
|
<< ", memory statistics:" << AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||||
|
}
|
||||||
return base_ptr + kMemAlignSize;
|
return base_ptr + kMemAlignSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,8 @@ fi
|
||||||
CHECK_RESULT_FILE=__code_format_check_result__
|
CHECK_RESULT_FILE=__code_format_check_result__
|
||||||
echo "0" > "$CHECK_RESULT_FILE"
|
echo "0" > "$CHECK_RESULT_FILE"
|
||||||
|
|
||||||
|
set +e
|
||||||
|
|
||||||
# check format of files modified in the latest commit
|
# check format of files modified in the latest commit
|
||||||
while read line; do
|
while read line; do
|
||||||
if [ ! -e ${line} ]; then
|
if [ ! -e ${line} ]; then
|
||||||
|
@ -116,6 +118,8 @@ while read line; do
|
||||||
fi
|
fi
|
||||||
done < "${CHECK_LIST_FILE}"
|
done < "${CHECK_LIST_FILE}"
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
result=$(cat "${CHECK_RESULT_FILE}")
|
result=$(cat "${CHECK_RESULT_FILE}")
|
||||||
rm "${CHECK_RESULT_FILE}"
|
rm "${CHECK_RESULT_FILE}"
|
||||||
rm "${CHECK_LIST_FILE}"
|
rm "${CHECK_LIST_FILE}"
|
||||||
|
|
Loading…
Reference in New Issue