log improvement

This commit is contained in:
LaiYongqiang 2021-10-27 12:36:47 +08:00
parent 1639a33c07
commit dc7988f4bd
7 changed files with 61 additions and 44 deletions

View File

@ -32,6 +32,7 @@
#include "runtime/device/ascend/kernel_build_ascend.h"
#include "runtime/device/ascend/ascend_kernel_runtime.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "runtime/device/ascend/ascend_memory_adapter.h"
#include "backend/optimizer/ascend/ascend_backend_optimization.h"
#include "backend/optimizer/common/common_backend_optimization.h"
#include "runtime/device/kernel_adjust.h"
@ -586,7 +587,7 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
}
}
bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedInsertSwitch(); }
bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedLoopSink(); }
void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
@ -1285,7 +1286,8 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->AssignMemory(*kernel_graph);
device::KernelAdjust::GetInstance().AssignLoopCtrlMemory(*kernel_graph);
MS_LOG(INFO) << "Status record: end memory alloc. graph id: " << kernel_graph->graph_id();
MS_LOG(INFO) << "Status record: end memory alloc. graph id: " << kernel_graph->graph_id()
<< ", Memory Statistics:" << device::ascend::AscendMemAdapter::GetInstance().DevMemStatistics();
}
void AscendSession::RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input_tensors,

View File

@ -31,35 +31,45 @@ bool AscendMemAdapter::Initialize() {
return true;
}
size_t free_hbm_size = 0;
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &total_hbm_size_);
if (ret != RT_ERROR_NONE || total_hbm_size_ == 0) {
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << total_hbm_size_;
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &device_hbm_size_);
if (ret != RT_ERROR_NONE || device_hbm_size_ == 0) {
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << device_hbm_size_;
}
max_hbm_size_for_ms_ = total_hbm_size_ * 15 / 16; // reserved memory is 1/16 of total
auto context_mem = GetDeviceMemSizeFromContext();
device_mem_size_ = context_mem == 0 ? max_hbm_size_for_ms_ : context_mem;
device_mem_base_ = MallocFromRts(device_mem_size_);
static_mem_offset_ = device_mem_size_;
// reserved memory for HCCL or other component
auto reserved_mem_size_for_others = device_hbm_size_ * 15 / 16;
reserved_mem_size_for_others =
reserved_mem_size_for_others >= (1 << kMemSizeGB) ? (1 << kMemSizeGB) : reserved_mem_size_for_others;
max_available_ms_hbm_size_ = device_hbm_size_ - reserved_mem_size_for_others;
auto user_define_ms_size_ = GetDeviceMemSizeFromContext();
ms_used_hbm_size_ = user_define_ms_size_ == 0 ? max_available_ms_hbm_size_ : user_define_ms_size_;
MS_LOG(INFO) << "Device HBM Size:" << device_hbm_size_
<< ", Reserved HBM size for Other Components(HCCL/rts/etc.):" << reserved_mem_size_for_others
<< ", Max available HBM Size for MindSpore:" << max_available_ms_hbm_size_
<< ", User define MindSpore HBM Size:" << user_define_ms_size_
<< ", MindSpore Used HBM Size:" << ms_used_hbm_size_;
device_mem_base_addr_ = MallocFromRts(ms_used_hbm_size_);
static_mem_offset_ = ms_used_hbm_size_;
cur_dynamic_mem_offset_ = 0;
max_dynamic_mem_offset_ = 0;
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
MS_LOG(INFO) << "Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
initialized_ = true;
return true;
}
bool AscendMemAdapter::DeInitialize() {
if (!initialized_) {
MS_LOG(ERROR) << " DeInitialize Ascend Memory Adapter when it is not initialize";
MS_LOG(ERROR) << "DeInitialize Ascend Memory Adapter when it is not initialize";
return false;
}
auto ret = FreeToRts(device_mem_base_);
auto ret = FreeToRts(device_mem_base_addr_);
if (ret) {
total_hbm_size_ = 0;
max_hbm_size_for_ms_ = 0;
device_mem_base_ = nullptr;
device_mem_size_ = 0;
device_hbm_size_ = 0;
max_available_ms_hbm_size_ = 0;
device_mem_base_addr_ = nullptr;
ms_used_hbm_size_ = 0;
cur_dynamic_mem_offset_ = 0;
max_dynamic_mem_offset_ = 0;
@ -79,14 +89,14 @@ uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) {
std::lock_guard<std::mutex> locker(mutex_);
auto new_static_offset = static_mem_offset_ - size;
if (new_static_offset < max_dynamic_mem_offset_) {
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics()
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
MS_LOG(ERROR) << DevMemDetailInfo();
return nullptr;
}
auto memory_block_ptr = device_mem_base_ + new_static_offset;
auto memory_block_ptr = device_mem_base_addr_ + new_static_offset;
static_mem_offset_ = new_static_offset;
static_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
@ -97,14 +107,14 @@ uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) {
std::lock_guard<std::mutex> locker(mutex_);
auto new_dynamic_offset = cur_dynamic_mem_offset_ + size;
if (new_dynamic_offset > static_mem_offset_) {
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics()
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
MS_LOG(ERROR) << DevMemDetailInfo();
return nullptr;
}
auto memory_block_ptr = device_mem_base_ + cur_dynamic_mem_offset_;
auto memory_block_ptr = device_mem_base_addr_ + cur_dynamic_mem_offset_;
cur_dynamic_mem_offset_ = new_dynamic_offset;
max_dynamic_mem_offset_ = std::max(cur_dynamic_mem_offset_, max_dynamic_mem_offset_);
dynamic_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
@ -116,14 +126,12 @@ void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; }
std::string AscendMemAdapter::DevMemStatistics() {
std::ostringstream oss;
oss << "\nHBM memory size: " << total_hbm_size_;
oss << "\nAvailable HBM memory size for MS: " << max_hbm_size_for_ms_;
oss << "\nMS memory base size: " << device_mem_size_;
oss << "\nMS memory base address: " << reinterpret_cast<void *>(device_mem_base_);
oss << "\nStatic Memory size: " << device_mem_size_ - static_mem_offset_;
oss << "\nDevice HBM memory size: " << device_hbm_size_;
oss << "\nMindSpore Used memory size: " << ms_used_hbm_size_;
oss << "\nMindSpore memory base address: " << reinterpret_cast<void *>(device_mem_base_addr_);
oss << "\nTotal Static Memory size: " << ms_used_hbm_size_ - static_mem_offset_;
oss << "\nTotal Dynamic memory size: " << max_dynamic_mem_offset_;
oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_;
oss << "\nMAX Dynamic memory size of all graph: " << max_dynamic_mem_offset_;
oss << "\nMS Static memory offset: " << static_mem_offset_;
oss << std::endl;
return oss.str();
}
@ -161,9 +169,9 @@ size_t AscendMemAdapter::GetDeviceMemSizeFromContext() {
auto gb_var = std::stoull(gb_str);
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
auto max_hbm_size_for_ms_GB = max_hbm_size_for_ms_ >> kMemSizeGB;
auto max_hbm_size_for_ms_GB = max_available_ms_hbm_size_ >> kMemSizeGB;
if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) {
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (total_hbm_size_ >> kMemSizeGB)
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (device_hbm_size_ >> kMemSizeGB)
<< " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB
<< "]GB, but got " << gb_var
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";

View File

@ -43,8 +43,7 @@ class AscendMemAdapter {
void ResetDynamicMemory();
uint64_t FreeDevMemSize() { return static_mem_offset_ - max_dynamic_mem_offset_; }
uint64_t TotalDevMemSize() { return device_mem_size_; }
uint64_t MaxHbmSizeForMs() { return max_hbm_size_for_ms_; }
uint64_t MaxHbmSizeForMs() { return max_available_ms_hbm_size_; }
std::string DevMemStatistics();
std::string DevMemDetailInfo();
@ -72,10 +71,10 @@ class AscendMemAdapter {
std::mutex mutex_;
// rts Memory INFO
size_t total_hbm_size_{0};
size_t max_hbm_size_for_ms_{0};
uint8_t *device_mem_base_{nullptr};
uint64_t device_mem_size_{0};
size_t device_hbm_size_{0};
size_t max_available_ms_hbm_size_{0};
uint8_t *device_mem_base_addr_{nullptr};
uint64_t ms_used_hbm_size_{0};
// dynamic memory info
uint64_t cur_dynamic_mem_offset_{0};

View File

@ -300,7 +300,7 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
}
void AscendStreamAssign::SetLoopSink() {
if (KernelAdjust::NeedInsertSwitch()) {
if (KernelAdjust::NeedLoopSink()) {
loop_sink_ = true;
} else {
loop_sink_ = false;

View File

@ -63,7 +63,7 @@ void KernelAdjust::ReorderGetNext(const std::shared_ptr<session::KernelGraph> &k
kernel_graph_ptr->set_execution_order(new_order_list);
}
bool KernelAdjust::NeedInsertSwitch() {
bool KernelAdjust::NeedLoopSink() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
return (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK) &&
@ -396,7 +396,7 @@ void KernelAdjust::ProcessLoopSink(const std::shared_ptr<session::KernelGraph> &
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
device::ascend::AscendStreamMng &resource_manager = device::ascend::AscendStreamMng::GetInstance();
resource_manager.ResetResource();
if (!NeedInsertSwitch()) {
if (!NeedLoopSink()) {
return;
}
if (kernel_graph_ptr->is_dynamic_shape()) {
@ -954,7 +954,7 @@ void KernelAdjust::InsertDeviceLoopCtrl(const std::shared_ptr<session::KernelGra
// constant loop num in epoch tensor
int32_t initial_value = 0;
if (NeedInsertSwitch()) {
if (NeedLoopSink()) {
initial_value = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
} else {
MS_LOG(INFO) << "Tensor const_loop_num_in_epoch only used in loop sink mode.";
@ -1009,6 +1009,10 @@ void KernelAdjust::AssignLoopCtrlTensorMem(const session::KernelGraph &kernel_gr
}
void KernelAdjust::AssignLoopCtrlMemory(const session::KernelGraph &kernel_graph_ptr) {
auto device_loop_control_tensors = kernel_graph_ptr.device_loop_control_tensors();
if (device_loop_control_tensors.empty()) {
return;
}
MS_LOG(INFO) << "Assign device loop control memory";
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
@ -1046,6 +1050,10 @@ void KernelAdjust::SetDeviceLoopCtrlTensor(const std::shared_ptr<session::Kernel
void KernelAdjust::LoadDeviceLoopCtrlParameters(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
auto device_loop_control_tensors = kernel_graph_ptr->device_loop_control_tensors();
if (device_loop_control_tensors.empty()) {
return;
}
MS_LOG(INFO) << "Load device loop control data";
SetDeviceLoopCtrlTensor(kernel_graph_ptr, kCurLoopCountName, 0);
SetDeviceLoopCtrlTensor(kernel_graph_ptr, kNextLoopCountName, 0);

View File

@ -68,7 +68,7 @@ class KernelAdjust {
#ifndef ENABLE_SECURITY
void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
#endif
static bool NeedInsertSwitch();
static bool NeedLoopSink();
CNodePtr CreateStreamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
private:

View File

@ -38,7 +38,7 @@ void KernelAdjust::LoadDeviceLoopCtrlParameters(const std::shared_ptr<session::K
return;
}
bool KernelAdjust::NeedInsertSwitch() { return true; }
bool KernelAdjust::NeedLoopSink() { return true; }
void KernelAdjust::ProcessLoopSink(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }