forked from mindspore-Ecosystem/mindspore
!10294 Add support for CPU memory reuse
From: @ythuang Reviewed-by: Signed-off-by:
This commit is contained in:
commit
05ec9352f3
|
@ -81,6 +81,14 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr
|
|||
#endif
|
||||
MS_LOG(INFO) << "Build kernel";
|
||||
BuildKernel(graph.get());
|
||||
// Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph
|
||||
auto execution_order = graph->execution_order();
|
||||
Reorder(&execution_order);
|
||||
graph->set_execution_order(execution_order);
|
||||
// runtime init
|
||||
if (!runtime_.Init()) {
|
||||
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
|
||||
}
|
||||
MS_LOG(INFO) << "Assign kernel address";
|
||||
runtime_.AssignKernelAddress(graph.get());
|
||||
return graph_id;
|
||||
|
@ -116,11 +124,8 @@ void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
|
|||
#endif
|
||||
|
||||
MS_LOG(INFO) << "Run graph start";
|
||||
auto execution_order = kernel_graph->execution_order();
|
||||
Reorder(&execution_order);
|
||||
|
||||
bool enable_summary = summary_callback_ != nullptr;
|
||||
kernel_graph->set_execution_order(execution_order);
|
||||
NamedSummaryOutputs summary_outputs;
|
||||
if (enable_summary) {
|
||||
SetSummaryNodes(kernel_graph.get());
|
||||
|
@ -181,16 +186,21 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
|
|||
auto kernel_graph = run_op_graphs_[graph_info];
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
|
||||
// Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph
|
||||
auto execution_order = kernel_graph->execution_order();
|
||||
Reorder(&execution_order);
|
||||
kernel_graph->set_execution_order(execution_order);
|
||||
|
||||
// runtime init
|
||||
if (!runtime_.Init()) {
|
||||
MS_LOG(EXCEPTION) << "Kernel runtime init error.";
|
||||
}
|
||||
runtime_.AssignKernelAddress(kernel_graph.get());
|
||||
std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
|
||||
runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node);
|
||||
runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs);
|
||||
|
||||
MS_LOG(INFO) << "Run Op start";
|
||||
auto execution_order = kernel_graph->execution_order();
|
||||
Reorder(&execution_order);
|
||||
|
||||
kernel_graph->set_execution_order(execution_order);
|
||||
|
||||
bool ret = runtime_.Run(kernel_graph.get(), false);
|
||||
if (!ret) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <exception>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "runtime/device/cpu/cpu_memory_manager.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/session/session_basic.h"
|
||||
|
@ -31,16 +32,47 @@
|
|||
#include "utils/shape_utils.h"
|
||||
#include "utils/profile.h"
|
||||
#include "utils/trace_base.h"
|
||||
#ifdef MEM_REUSE_DEBUG
|
||||
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace cpu {
|
||||
|
||||
bool CPUKernelRuntime::Init() {
|
||||
if (initialized_) {
|
||||
return true;
|
||||
}
|
||||
mem_manager_ = std::make_shared<CPUMemoryManager>();
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t INIT_NODE_REF = 1;
|
||||
void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
|
||||
AssignValueNodeAddress(kernel_graph);
|
||||
AssignInputNodeAddress(kernel_graph);
|
||||
AssignKernelOutputAddress(kernel_graph);
|
||||
resource_manager_.AssignMemory(kernel_graph);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
bool is_enable_mem_reuse = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_REUSE);
|
||||
if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
|
||||
// disable mem reuse for kPynativeMode
|
||||
is_enable_mem_reuse = false;
|
||||
}
|
||||
if (is_enable_mem_reuse) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
mem_manager_->ResetDynamicMemory();
|
||||
AssignDynamicMemory(kernel_graph);
|
||||
#ifdef MEM_REUSE_DEBUG
|
||||
// Get normal graph ir for memreuse
|
||||
mindspore::memreuse::MemReuseChecker::GetInstance().CheckNormalIR(kernel_graph);
|
||||
#endif
|
||||
} else {
|
||||
AssignKernelOutputAddress(kernel_graph);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->AssignMemory(kernel_graph);
|
||||
}
|
||||
}
|
||||
|
||||
void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph) {
|
||||
|
@ -75,7 +107,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph
|
|||
if (tensor->data_type() == output_type_id) {
|
||||
address->ptr_ = tensor->data_c();
|
||||
} else {
|
||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||
address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size);
|
||||
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||
tensor->data_c())) {
|
||||
MS_LOG(EXCEPTION) << "Value node sync host to device failed!";
|
||||
|
@ -169,7 +201,7 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(
|
|||
size_t type_size = GetTypeByte(TypeIdToType(device_type_id));
|
||||
ShapeVector data_shape = tensor->shape();
|
||||
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||
address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size);
|
||||
tensor->set_sync_status(kNeedSyncDeviceToHostImmediately);
|
||||
} else {
|
||||
tensor->set_sync_status(kNoNeedSync);
|
||||
|
@ -269,7 +301,7 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker
|
|||
ShapeVector data_shape = tensor->shape();
|
||||
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(),
|
||||
GetTypeByte(TypeIdToType(address->type_id_)), std::multiplies<size_t>());
|
||||
address->ptr_ = resource_manager_.MemMalloc(tensor_size);
|
||||
address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(tensor_size);
|
||||
if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||
tensor->data_c())) {
|
||||
MS_LOG(EXCEPTION) << "Parameter node sync host to device failed!";
|
||||
|
@ -323,7 +355,7 @@ void CPUKernelRuntime::AddRuntimeAddress(DeviceAddress *address, std::vector<ker
|
|||
kernel::AddressPtr input = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
if (address->ptr_ == nullptr) {
|
||||
address->ptr_ = resource_manager_.MemMalloc(address->size_);
|
||||
address->ptr_ = static_cast<CPUMemoryManager *>(mem_manager_.get())->StaticMemMalloc(address->size_);
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(address->ptr_);
|
||||
input->addr = address->ptr_;
|
||||
|
@ -332,16 +364,16 @@ void CPUKernelRuntime::AddRuntimeAddress(DeviceAddress *address, std::vector<ker
|
|||
}
|
||||
|
||||
void CPUKernelRuntime::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
resource_manager_.IncreaseSummaryRefCount(summary_outputs);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseSummaryRefCount(summary_outputs);
|
||||
}
|
||||
|
||||
void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
resource_manager_.DecreaseSummaryRefCount(summary_outputs);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseSummaryRefCount(summary_outputs);
|
||||
}
|
||||
|
||||
bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
resource_manager_.IncreaseAddressRefCount(kernel_graph);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph);
|
||||
|
||||
auto kernels = kernel_graph->execution_order();
|
||||
for (const auto &kernel : kernels) {
|
||||
|
@ -382,7 +414,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
|
|||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel);
|
||||
}
|
||||
resource_manager_.DecreaseAddressRefCount(kernel);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseAddressRefCount(kernel);
|
||||
#ifdef ENABLE_PROFILE
|
||||
double cost_time = GetTime() - start_time;
|
||||
MS_LOG(INFO) << "cpu kernel: " << kernel->fullname_with_scope() << " costs " << cost_time * 1e6 << " us";
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "backend/session/session_basic.h"
|
||||
#include "runtime/device/cpu/cpu_resource_manager.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "utils/any.h"
|
||||
namespace mindspore {
|
||||
|
@ -35,7 +34,7 @@ class CPUKernelRuntime : public KernelRuntime {
|
|||
CPUKernelRuntime() = default;
|
||||
~CPUKernelRuntime() override = default;
|
||||
|
||||
bool Init() override { return true; }
|
||||
bool Init();
|
||||
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
void AssignKernelAddress(session::KernelGraph *kernel_graph);
|
||||
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
|
||||
|
@ -63,9 +62,9 @@ class CPUKernelRuntime : public KernelRuntime {
|
|||
void AssignInputNodeAddress(const session::KernelGraph *kernel_graph);
|
||||
void AssignKernelOutputAddress(const session::KernelGraph *kernel_graph);
|
||||
void AddRuntimeAddress(DeviceAddress *address, std::vector<kernel::AddressPtr> *input_list);
|
||||
CPUResourceManager resource_manager_;
|
||||
std::set<DeviceAddressPtr> bound_addresses_;
|
||||
std::map<AnfNodePtr, tensor::TensorPtr> input_param_tensor_map_;
|
||||
bool initialized_{false};
|
||||
};
|
||||
} // namespace cpu
|
||||
} // namespace device
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,28 +13,90 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "runtime/device/cpu/cpu_resource_manager.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
#include "runtime/device/cpu/cpu_memory_manager.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/convert_utils.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace cpu {
|
||||
CPUResourceManager::~CPUResourceManager() { MemFree(); }
|
||||
|
||||
void CPUResourceManager::MemFree() {
|
||||
uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool) {
|
||||
void *ptr = malloc(size);
|
||||
if (ptr != nullptr) {
|
||||
memset_s(ptr, size, 0, size);
|
||||
static_mem_[ptr] = size;
|
||||
return reinterpret_cast<uint8_t *>(ptr);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed: size " << size;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t *CPUMemoryManager::MallocDynamicMem(size_t size, bool) {
|
||||
void *ptr = nullptr;
|
||||
size_t min_size = 0;
|
||||
// first find the smallest cached_mem_ which fits the size
|
||||
for (auto &&iter : cached_mem_) {
|
||||
if (iter.second >= size) {
|
||||
if (min_size == 0) {
|
||||
ptr = iter.first;
|
||||
min_size = iter.second;
|
||||
} else if (iter.second < min_size) {
|
||||
ptr = iter.first;
|
||||
min_size = iter.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ptr != nullptr) {
|
||||
memset_s(ptr, size, 0, size);
|
||||
dynamic_mem_[ptr] = min_size;
|
||||
(void)cached_mem_.erase(ptr);
|
||||
return reinterpret_cast<uint8_t *>(ptr);
|
||||
}
|
||||
// if not found, malloc
|
||||
ptr = malloc(size);
|
||||
if (ptr != nullptr) {
|
||||
memset_s(ptr, size, 0, size);
|
||||
dynamic_mem_[ptr] = size;
|
||||
return reinterpret_cast<uint8_t *>(ptr);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed: size " << size;
|
||||
}
|
||||
}
|
||||
|
||||
void CPUMemoryManager::ResetDynamicMemory() {
|
||||
// don't free, for multi graph
|
||||
for (auto &&iter : dynamic_mem_) {
|
||||
cached_mem_[iter.first] = iter.second;
|
||||
}
|
||||
dynamic_mem_.clear();
|
||||
}
|
||||
|
||||
CPUMemoryManager::~CPUMemoryManager() { MemFree(); }
|
||||
|
||||
void CPUMemoryManager::MemFree() {
|
||||
if (mem_ptr_ != nullptr) {
|
||||
free(mem_ptr_);
|
||||
mem_ptr_ = nullptr;
|
||||
mem_size_ = 0;
|
||||
}
|
||||
|
||||
for (auto &&iter : static_mem_) {
|
||||
free(iter.first);
|
||||
}
|
||||
static_mem_.clear();
|
||||
for (auto &&iter : dynamic_mem_) {
|
||||
free(iter.first);
|
||||
}
|
||||
dynamic_mem_.clear();
|
||||
for (auto &&iter : cached_mem_) {
|
||||
free(iter.first);
|
||||
}
|
||||
cached_mem_.clear();
|
||||
}
|
||||
|
||||
void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) {
|
||||
void CPUMemoryManager::AssignMemory(const session::KernelGraph *graph) {
|
||||
size_t graph_mem_size = mem_plan_.MemPlan(graph);
|
||||
if (graph_mem_size > mem_size_) {
|
||||
if (mem_size_ > 0) {
|
||||
|
@ -43,6 +105,7 @@ void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) {
|
|||
}
|
||||
mem_ptr_ = reinterpret_cast<uint8_t *>(malloc(graph_mem_size));
|
||||
if (mem_ptr_ != nullptr) {
|
||||
MS_LOG(INFO) << "Simple MemPlan GraphMemSize [" << graph_mem_size << "]";
|
||||
mem_size_ = graph_mem_size;
|
||||
dynamic_malloc_ = false;
|
||||
} else {
|
||||
|
@ -56,26 +119,26 @@ void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) {
|
|||
mem_plan_.MemAssign(graph, mem_ptr_);
|
||||
}
|
||||
|
||||
void *CPUResourceManager::MemMalloc(size_t mem_size) {
|
||||
void *CPUMemoryManager::StaticMemMalloc(size_t mem_size) {
|
||||
void *ptr = malloc(mem_size);
|
||||
if (ptr != nullptr) {
|
||||
memset_s(ptr, mem_size, 0, mem_size);
|
||||
dynamic_mem_[ptr] = mem_size;
|
||||
static_mem_[ptr] = mem_size;
|
||||
return ptr;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed: size " << mem_size;
|
||||
}
|
||||
}
|
||||
|
||||
void CPUResourceManager::MemFree(void *ptr) {
|
||||
auto iter = dynamic_mem_.find(ptr);
|
||||
if (iter != dynamic_mem_.end()) {
|
||||
(void)dynamic_mem_.erase(iter);
|
||||
void CPUMemoryManager::MemFree(void *ptr) {
|
||||
auto iter = static_mem_.find(ptr);
|
||||
if (iter != static_mem_.end()) {
|
||||
(void)static_mem_.erase(iter);
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void CPUResourceManager::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
void CPUMemoryManager::IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
if (!dynamic_malloc_) {
|
||||
return;
|
||||
}
|
||||
|
@ -93,7 +156,7 @@ void CPUResourceManager::IncreaseSummaryRefCount(const session::NamedSummaryOutp
|
|||
}
|
||||
}
|
||||
|
||||
void CPUResourceManager::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
void CPUMemoryManager::DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs) {
|
||||
if (!dynamic_malloc_) {
|
||||
return;
|
||||
}
|
||||
|
@ -115,7 +178,7 @@ void CPUResourceManager::DecreaseSummaryRefCount(const session::NamedSummaryOutp
|
|||
}
|
||||
}
|
||||
|
||||
void CPUResourceManager::IncreaseAddressRefCount(const session::KernelGraph *graph) {
|
||||
void CPUMemoryManager::IncreaseAddressRefCount(const session::KernelGraph *graph) {
|
||||
if (!dynamic_malloc_) {
|
||||
return;
|
||||
}
|
||||
|
@ -140,7 +203,7 @@ void CPUResourceManager::IncreaseAddressRefCount(const session::KernelGraph *gra
|
|||
}
|
||||
}
|
||||
|
||||
void CPUResourceManager::DecreaseAddressRefCount(const AnfNodePtr &kernel) {
|
||||
void CPUMemoryManager::DecreaseAddressRefCount(const AnfNodePtr &kernel) {
|
||||
if (!dynamic_malloc_) {
|
||||
return;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,31 +13,40 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "backend/session/session_basic.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "runtime/device/memory_manager.h"
|
||||
#include "runtime/device/cpu/cpu_simple_mem_plan.h"
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace cpu {
|
||||
class CPUResourceManager {
|
||||
class CPUMemoryManager : public MemoryManager {
|
||||
public:
|
||||
CPUResourceManager() = default;
|
||||
~CPUResourceManager();
|
||||
CPUMemoryManager() = default;
|
||||
virtual ~CPUMemoryManager();
|
||||
|
||||
void MallocDeviceMemory() override {}
|
||||
void FreeDeviceMemory() override {}
|
||||
void ResetDynamicMemory() override;
|
||||
|
||||
void AssignMemory(const session::KernelGraph *graph);
|
||||
void IncreaseAddressRefCount(const session::KernelGraph *graph);
|
||||
void DecreaseAddressRefCount(const AnfNodePtr &kernel);
|
||||
void *MemMalloc(size_t mem_size);
|
||||
void *StaticMemMalloc(size_t mem_size);
|
||||
void MemFree(void *ptr);
|
||||
void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
|
||||
protected:
|
||||
uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
|
||||
uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
|
||||
|
||||
private:
|
||||
void MemFree();
|
||||
CPUSimpleMemPlan mem_plan_;
|
||||
|
@ -46,9 +55,10 @@ class CPUResourceManager {
|
|||
uint8_t *mem_ptr_{nullptr};
|
||||
bool dynamic_malloc_{false};
|
||||
std::map<void *, size_t> dynamic_mem_;
|
||||
std::map<void *, size_t> static_mem_;
|
||||
std::map<void *, size_t> cached_mem_;
|
||||
};
|
||||
} // namespace cpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_MEMORY_MANAGER_H_
|
|
@ -28,7 +28,7 @@ namespace mindspore {
|
|||
namespace device {
|
||||
namespace cpu {
|
||||
class CPUSimpleMemPlan;
|
||||
class CPUResourceManager;
|
||||
class CPUMemoryManager;
|
||||
class CPUKernelRuntime;
|
||||
} // namespace cpu
|
||||
namespace ascend {
|
||||
|
@ -93,7 +93,7 @@ class DeviceAddress : public mindspore::DeviceSync {
|
|||
friend class MemoryManager;
|
||||
friend class mindspore::device::ascend::tasksink::TaskGenerator;
|
||||
friend class mindspore::device::cpu::CPUSimpleMemPlan;
|
||||
friend class mindspore::device::cpu::CPUResourceManager;
|
||||
friend class mindspore::device::cpu::CPUMemoryManager;
|
||||
friend class mindspore::device::cpu::CPUKernelRuntime;
|
||||
friend class mindspore::device::gpu::GPUKernelRuntime;
|
||||
friend class mindspore::device::gpu::GPUMemoryManager;
|
||||
|
|
Loading…
Reference in New Issue