forked from mindspore-Ecosystem/mindspore
log improvement
This commit is contained in:
parent
1639a33c07
commit
dc7988f4bd
|
@ -32,6 +32,7 @@
|
|||
#include "runtime/device/ascend/kernel_build_ascend.h"
|
||||
#include "runtime/device/ascend/ascend_kernel_runtime.h"
|
||||
#include "runtime/device/ascend/profiling/profiling_manager.h"
|
||||
#include "runtime/device/ascend/ascend_memory_adapter.h"
|
||||
#include "backend/optimizer/ascend/ascend_backend_optimization.h"
|
||||
#include "backend/optimizer/common/common_backend_optimization.h"
|
||||
#include "runtime/device/kernel_adjust.h"
|
||||
|
@ -586,7 +587,7 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
|
|||
}
|
||||
}
|
||||
|
||||
bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedInsertSwitch(); }
|
||||
bool AscendSession::IsSupportSummary() { return !device::KernelAdjust::NeedLoopSink(); }
|
||||
|
||||
void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const) {
|
||||
|
@ -1285,7 +1286,8 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
|
|||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->AssignMemory(*kernel_graph);
|
||||
device::KernelAdjust::GetInstance().AssignLoopCtrlMemory(*kernel_graph);
|
||||
MS_LOG(INFO) << "Status record: end memory alloc. graph id: " << kernel_graph->graph_id();
|
||||
MS_LOG(INFO) << "Status record: end memory alloc. graph id: " << kernel_graph->graph_id()
|
||||
<< ", Memory Statistics:" << device::ascend::AscendMemAdapter::GetInstance().DevMemStatistics();
|
||||
}
|
||||
|
||||
void AscendSession::RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
|
|
|
@ -31,35 +31,45 @@ bool AscendMemAdapter::Initialize() {
|
|||
return true;
|
||||
}
|
||||
size_t free_hbm_size = 0;
|
||||
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &total_hbm_size_);
|
||||
if (ret != RT_ERROR_NONE || total_hbm_size_ == 0) {
|
||||
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << total_hbm_size_;
|
||||
rtError_t ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_hbm_size, &device_hbm_size_);
|
||||
if (ret != RT_ERROR_NONE || device_hbm_size_ == 0) {
|
||||
MS_LOG(EXCEPTION) << "Get Device HBM memory size failed, ret = " << ret << ", total HBM size :" << device_hbm_size_;
|
||||
}
|
||||
|
||||
max_hbm_size_for_ms_ = total_hbm_size_ * 15 / 16; // reserved memory is 1/16 of total
|
||||
auto context_mem = GetDeviceMemSizeFromContext();
|
||||
device_mem_size_ = context_mem == 0 ? max_hbm_size_for_ms_ : context_mem;
|
||||
device_mem_base_ = MallocFromRts(device_mem_size_);
|
||||
static_mem_offset_ = device_mem_size_;
|
||||
// reserved memory for HCCL or other component
|
||||
auto reserved_mem_size_for_others = device_hbm_size_ * 15 / 16;
|
||||
reserved_mem_size_for_others =
|
||||
reserved_mem_size_for_others >= (1 << kMemSizeGB) ? (1 << kMemSizeGB) : reserved_mem_size_for_others;
|
||||
max_available_ms_hbm_size_ = device_hbm_size_ - reserved_mem_size_for_others;
|
||||
|
||||
auto user_define_ms_size_ = GetDeviceMemSizeFromContext();
|
||||
ms_used_hbm_size_ = user_define_ms_size_ == 0 ? max_available_ms_hbm_size_ : user_define_ms_size_;
|
||||
MS_LOG(INFO) << "Device HBM Size:" << device_hbm_size_
|
||||
<< ", Reserved HBM size for Other Components(HCCL/rts/etc.):" << reserved_mem_size_for_others
|
||||
<< ", Max available HBM Size for MindSpore:" << max_available_ms_hbm_size_
|
||||
<< ", User define MindSpore HBM Size:" << user_define_ms_size_
|
||||
<< ", MindSpore Used HBM Size:" << ms_used_hbm_size_;
|
||||
device_mem_base_addr_ = MallocFromRts(ms_used_hbm_size_);
|
||||
static_mem_offset_ = ms_used_hbm_size_;
|
||||
cur_dynamic_mem_offset_ = 0;
|
||||
max_dynamic_mem_offset_ = 0;
|
||||
MS_LOG(INFO) << " Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
|
||||
MS_LOG(INFO) << "Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendMemAdapter::DeInitialize() {
|
||||
if (!initialized_) {
|
||||
MS_LOG(ERROR) << " DeInitialize Ascend Memory Adapter when it is not initialize";
|
||||
MS_LOG(ERROR) << "DeInitialize Ascend Memory Adapter when it is not initialize";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto ret = FreeToRts(device_mem_base_);
|
||||
auto ret = FreeToRts(device_mem_base_addr_);
|
||||
if (ret) {
|
||||
total_hbm_size_ = 0;
|
||||
max_hbm_size_for_ms_ = 0;
|
||||
device_mem_base_ = nullptr;
|
||||
device_mem_size_ = 0;
|
||||
device_hbm_size_ = 0;
|
||||
max_available_ms_hbm_size_ = 0;
|
||||
device_mem_base_addr_ = nullptr;
|
||||
ms_used_hbm_size_ = 0;
|
||||
|
||||
cur_dynamic_mem_offset_ = 0;
|
||||
max_dynamic_mem_offset_ = 0;
|
||||
|
@ -79,14 +89,14 @@ uint8_t *AscendMemAdapter::MallocStaticDevMem(size_t size, std::string tag) {
|
|||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto new_static_offset = static_mem_offset_ - size;
|
||||
if (new_static_offset < max_dynamic_mem_offset_) {
|
||||
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
|
||||
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics()
|
||||
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||
MS_LOG(ERROR) << DevMemDetailInfo();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto memory_block_ptr = device_mem_base_ + new_static_offset;
|
||||
auto memory_block_ptr = device_mem_base_addr_ + new_static_offset;
|
||||
static_mem_offset_ = new_static_offset;
|
||||
static_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
|
||||
|
||||
|
@ -97,14 +107,14 @@ uint8_t *AscendMemAdapter::MallocDynamicDevMem(size_t size, std::string tag) {
|
|||
std::lock_guard<std::mutex> locker(mutex_);
|
||||
auto new_dynamic_offset = cur_dynamic_mem_offset_ + size;
|
||||
if (new_dynamic_offset > static_mem_offset_) {
|
||||
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << " Memory Statistic:" << DevMemStatistics()
|
||||
<< " failed! Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
MS_LOG(ERROR) << "Out of Memory!!! Request memory size: " << size << ", Memory Statistic:" << DevMemStatistics()
|
||||
<< "Please try to reduce 'batch_size' or check whether exists extra large shape. More "
|
||||
"details can be found in MindSpore's FAQ with keyword 'Out of Memory'.";
|
||||
MS_LOG(ERROR) << DevMemDetailInfo();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto memory_block_ptr = device_mem_base_ + cur_dynamic_mem_offset_;
|
||||
auto memory_block_ptr = device_mem_base_addr_ + cur_dynamic_mem_offset_;
|
||||
cur_dynamic_mem_offset_ = new_dynamic_offset;
|
||||
max_dynamic_mem_offset_ = std::max(cur_dynamic_mem_offset_, max_dynamic_mem_offset_);
|
||||
dynamic_memory_block_list_.push_back(std::make_shared<MemoryBlock>(memory_block_ptr, size, tag));
|
||||
|
@ -116,14 +126,12 @@ void AscendMemAdapter::ResetDynamicMemory() { cur_dynamic_mem_offset_ = 0; }
|
|||
|
||||
std::string AscendMemAdapter::DevMemStatistics() {
|
||||
std::ostringstream oss;
|
||||
oss << "\nHBM memory size: " << total_hbm_size_;
|
||||
oss << "\nAvailable HBM memory size for MS: " << max_hbm_size_for_ms_;
|
||||
oss << "\nMS memory base size: " << device_mem_size_;
|
||||
oss << "\nMS memory base address: " << reinterpret_cast<void *>(device_mem_base_);
|
||||
oss << "\nStatic Memory size: " << device_mem_size_ - static_mem_offset_;
|
||||
oss << "\nDevice HBM memory size: " << device_hbm_size_;
|
||||
oss << "\nMindSpore Used memory size: " << ms_used_hbm_size_;
|
||||
oss << "\nMindSpore memory base address: " << reinterpret_cast<void *>(device_mem_base_addr_);
|
||||
oss << "\nTotal Static Memory size: " << ms_used_hbm_size_ - static_mem_offset_;
|
||||
oss << "\nTotal Dynamic memory size: " << max_dynamic_mem_offset_;
|
||||
oss << "\nDynamic memory size of this graph: " << cur_dynamic_mem_offset_;
|
||||
oss << "\nMAX Dynamic memory size of all graph: " << max_dynamic_mem_offset_;
|
||||
oss << "\nMS Static memory offset: " << static_mem_offset_;
|
||||
oss << std::endl;
|
||||
return oss.str();
|
||||
}
|
||||
|
@ -161,9 +169,9 @@ size_t AscendMemAdapter::GetDeviceMemSizeFromContext() {
|
|||
auto gb_var = std::stoull(gb_str);
|
||||
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
|
||||
|
||||
auto max_hbm_size_for_ms_GB = max_hbm_size_for_ms_ >> kMemSizeGB;
|
||||
auto max_hbm_size_for_ms_GB = max_available_ms_hbm_size_ >> kMemSizeGB;
|
||||
if (gb_var > max_hbm_size_for_ms_GB || gb_var == 0) {
|
||||
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (total_hbm_size_ >> kMemSizeGB)
|
||||
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << (device_hbm_size_ >> kMemSizeGB)
|
||||
<< " GB, variable_memory_max_size should be in range (0-" << max_hbm_size_for_ms_GB
|
||||
<< "]GB, but got " << gb_var
|
||||
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
|
||||
|
|
|
@ -43,8 +43,7 @@ class AscendMemAdapter {
|
|||
void ResetDynamicMemory();
|
||||
|
||||
uint64_t FreeDevMemSize() { return static_mem_offset_ - max_dynamic_mem_offset_; }
|
||||
uint64_t TotalDevMemSize() { return device_mem_size_; }
|
||||
uint64_t MaxHbmSizeForMs() { return max_hbm_size_for_ms_; }
|
||||
uint64_t MaxHbmSizeForMs() { return max_available_ms_hbm_size_; }
|
||||
|
||||
std::string DevMemStatistics();
|
||||
std::string DevMemDetailInfo();
|
||||
|
@ -72,10 +71,10 @@ class AscendMemAdapter {
|
|||
std::mutex mutex_;
|
||||
|
||||
// rts Memory INFO
|
||||
size_t total_hbm_size_{0};
|
||||
size_t max_hbm_size_for_ms_{0};
|
||||
uint8_t *device_mem_base_{nullptr};
|
||||
uint64_t device_mem_size_{0};
|
||||
size_t device_hbm_size_{0};
|
||||
size_t max_available_ms_hbm_size_{0};
|
||||
uint8_t *device_mem_base_addr_{nullptr};
|
||||
uint64_t ms_used_hbm_size_{0};
|
||||
|
||||
// dynamic memory info
|
||||
uint64_t cur_dynamic_mem_offset_{0};
|
||||
|
|
|
@ -300,7 +300,7 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
|
|||
}
|
||||
|
||||
void AscendStreamAssign::SetLoopSink() {
|
||||
if (KernelAdjust::NeedInsertSwitch()) {
|
||||
if (KernelAdjust::NeedLoopSink()) {
|
||||
loop_sink_ = true;
|
||||
} else {
|
||||
loop_sink_ = false;
|
||||
|
|
|
@ -63,7 +63,7 @@ void KernelAdjust::ReorderGetNext(const std::shared_ptr<session::KernelGraph> &k
|
|||
kernel_graph_ptr->set_execution_order(new_order_list);
|
||||
}
|
||||
|
||||
bool KernelAdjust::NeedInsertSwitch() {
|
||||
bool KernelAdjust::NeedLoopSink() {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
return (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK) &&
|
||||
|
@ -396,7 +396,7 @@ void KernelAdjust::ProcessLoopSink(const std::shared_ptr<session::KernelGraph> &
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
device::ascend::AscendStreamMng &resource_manager = device::ascend::AscendStreamMng::GetInstance();
|
||||
resource_manager.ResetResource();
|
||||
if (!NeedInsertSwitch()) {
|
||||
if (!NeedLoopSink()) {
|
||||
return;
|
||||
}
|
||||
if (kernel_graph_ptr->is_dynamic_shape()) {
|
||||
|
@ -954,7 +954,7 @@ void KernelAdjust::InsertDeviceLoopCtrl(const std::shared_ptr<session::KernelGra
|
|||
|
||||
// constant loop num in epoch tensor
|
||||
int32_t initial_value = 0;
|
||||
if (NeedInsertSwitch()) {
|
||||
if (NeedLoopSink()) {
|
||||
initial_value = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
|
||||
} else {
|
||||
MS_LOG(INFO) << "Tensor const_loop_num_in_epoch only used in loop sink mode.";
|
||||
|
@ -1009,6 +1009,10 @@ void KernelAdjust::AssignLoopCtrlTensorMem(const session::KernelGraph &kernel_gr
|
|||
}
|
||||
|
||||
void KernelAdjust::AssignLoopCtrlMemory(const session::KernelGraph &kernel_graph_ptr) {
|
||||
auto device_loop_control_tensors = kernel_graph_ptr.device_loop_control_tensors();
|
||||
if (device_loop_control_tensors.empty()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Assign device loop control memory";
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
|
@ -1046,6 +1050,10 @@ void KernelAdjust::SetDeviceLoopCtrlTensor(const std::shared_ptr<session::Kernel
|
|||
|
||||
void KernelAdjust::LoadDeviceLoopCtrlParameters(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
auto device_loop_control_tensors = kernel_graph_ptr->device_loop_control_tensors();
|
||||
if (device_loop_control_tensors.empty()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Load device loop control data";
|
||||
SetDeviceLoopCtrlTensor(kernel_graph_ptr, kCurLoopCountName, 0);
|
||||
SetDeviceLoopCtrlTensor(kernel_graph_ptr, kNextLoopCountName, 0);
|
||||
|
|
|
@ -68,7 +68,7 @@ class KernelAdjust {
|
|||
#ifndef ENABLE_SECURITY
|
||||
void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
|
||||
#endif
|
||||
static bool NeedInsertSwitch();
|
||||
static bool NeedLoopSink();
|
||||
CNodePtr CreateStreamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
|
||||
|
||||
private:
|
||||
|
|
|
@ -38,7 +38,7 @@ void KernelAdjust::LoadDeviceLoopCtrlParameters(const std::shared_ptr<session::K
|
|||
return;
|
||||
}
|
||||
|
||||
bool KernelAdjust::NeedInsertSwitch() { return true; }
|
||||
bool KernelAdjust::NeedLoopSink() { return true; }
|
||||
|
||||
void KernelAdjust::ProcessLoopSink(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
|
||||
|
||||
|
|
Loading…
Reference in New Issue