!40674 add memory actor for integration of dynamic and static memory

Merge pull request !40674 from limingqi107/new_actor_runtime
This commit is contained in:
i-robot 2022-08-25 07:38:49 +00:00 committed by Gitee
commit fbb1109752
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
14 changed files with 379 additions and 6 deletions

View File

@ -1077,6 +1077,7 @@ KernelGraphPtr AnfRuntimeAlgorithm::FetchKernelGraph(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
const auto &func_graph = node->func_graph();
if (func_graph == nullptr) {
MS_LOG(ERROR) << "No association graph for node: " << node->fullname_with_scope();
return nullptr;
} else {
return func_graph->cast<KernelGraphPtr>();

View File

@ -55,6 +55,11 @@ struct SomasInfo {
size_t whole_block_size_{0};
// offset -> aligned_size_
std::map<size_t, size_t> merged_blocks_map_;
// Alloc the base address of graph during execution, which is variable.
void *base_address_{nullptr};
// The owner graph id.
uint32_t graph_id_{0};
};
using DeviceType = device::DeviceType;

View File

@ -96,7 +96,10 @@ enum class KernelTransformType {
kSendActor,
kRecvActor,
// Fusion actor type.
kFusionActor
kFusionActor,
// Memory actor type.
kMemoryAllocActor,
kMemoryFreeActor
};
#define SET_FLAG(value, flag) ((value) = ((value) | (flag)))

View File

@ -37,6 +37,7 @@ using mindspore::device::DeviceContext;
using mindspore::device::KernelInfo;
using mindspore::kernel::Address;
using mindspore::kernel::KernelLaunchInfo;
using mindspore::session::SomasInfo;
using mindspore::tensor::TensorPtr;
struct InputDataInfo {
@ -66,7 +67,10 @@ class KernelActor : public DebugAwareActor {
strategy_(strategy),
modifiable_ref_input_indexes_(modifiable_ref_input_indexes),
modifiable_ref_output_indexes_(modifiable_ref_output_indexes),
is_launch_skipped_(false) {
is_launch_skipped_(false),
somas_info_(nullptr),
memory_alloc_insert_position_({-1, false}),
memory_free_insert_position_({-1, false}) {
(void)device_contexts_.emplace_back(device_context);
}
~KernelActor() override = default;
@ -124,6 +128,7 @@ class KernelActor : public DebugAwareActor {
private:
friend class GraphScheduler;
friend class ControlNodeScheduler;
friend class SchedulerHelper;
#ifdef ENABLE_RPC_ACTOR
friend class RpcNodeScheduler;
#endif
@ -161,6 +166,13 @@ class KernelActor : public DebugAwareActor {
// Whether skip the kernel launch.
bool is_launch_skipped_;
// The information used for integration of dynamic and static memory.
SomasInfo *somas_info_;
// The first of pair is the inserted position and initial value -1 is the invalid position, the second of pair value
// true is the data arrow and value false is the control arrow.
std::pair<int32_t, bool> memory_alloc_insert_position_;
std::pair<int32_t, bool> memory_free_insert_position_;
};
using KernelActorPtr = std::shared_ptr<KernelActor>;

View File

@ -0,0 +1,66 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/graph_scheduler/actor/memory/memory_alloc_actor.h"
#include "runtime/graph_scheduler/actor/memory_manager_actor.h"
namespace mindspore {
namespace runtime {
void MemoryAllocActor::Init() {
MS_EXCEPTION_IF_CHECK_FAIL((!device_contexts_.empty()), "The device context doesn't exist.");
MS_EXCEPTION_IF_NULL(device_contexts_[0]);
MS_EXCEPTION_IF_NULL(device_contexts_[0]->device_res_manager_);
MS_EXCEPTION_IF_NULL(somas_info_);
MS_EXCEPTION_IF_CHECK_FAIL((somas_info_->whole_block_size_ != 0), "The alloc size of somas info is zero.");
created_device_tensor_ = device_contexts_[0]->device_res_manager_->CreateDeviceAddress(
nullptr, somas_info_->whole_block_size_, "DefaultFormat", kNumberTypeFloat16, {});
(void)memory_alloc_list_.emplace_back(created_device_tensor_.get());
}
void MemoryAllocActor::SendMemoryAllocReq(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(created_device_tensor_);
created_device_tensor_->set_ptr(nullptr);
if (ActorDispatcher::is_memory_allocation_sync()) {
ActorDispatcher::SendSync(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_,
device_contexts_[0], context, GetAID());
OnMemoryAllocFinish(context);
} else {
ActorDispatcher::Send(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_,
device_contexts_[0], context, GetAID());
}
}
void MemoryAllocActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(context);
MS_EXCEPTION_IF_NULL(somas_info_);
MS_EXCEPTION_IF_NULL(created_device_tensor_);
if (IsRunningFailed(context)) {
return;
}
// Set the base address of somas info using the alloc memory.
if (somas_info_->base_address_ != nullptr) {
std::string error_info = GetAID().Name() + " already has the base address.";
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info);
}
somas_info_->base_address_ = created_device_tensor_->GetMutablePtr();
MS_LOG(DEBUG) << GetAID().Name() << " alloc memory: " << somas_info_->base_address_;
PostRun(context);
}
} // namespace runtime
} // namespace mindspore

View File

@ -0,0 +1,66 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_ALLOC_ACTOR_H_
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_ALLOC_ACTOR_H_
#include <string>
#include <memory>
#include <vector>
#include "runtime/graph_scheduler/actor/memory_aware_actor.h"
namespace mindspore {
namespace runtime {
using mindspore::session::SomasInfo;
// The memory alloc actor is used to alloc memory of the whole graph at the begin of graph running.
class MemoryAllocActor : public MemoryAwareActor {
public:
MemoryAllocActor(const std::string &name, const AID &memory_manager_aid, SomasInfo *somas_info,
const DeviceContext *device_context)
: MemoryAwareActor(name, KernelTransformType::kMemoryAllocActor, nullptr, memory_manager_aid),
somas_info_(somas_info),
created_device_tensor_(nullptr) {
(void)device_contexts_.emplace_back(device_context);
}
~MemoryAllocActor() override = default;
// The memory related operation interface.
void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) override;
// The processing after memory alloc finished.
void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;
// Get the member.
SomasInfo *somas_info() const { return somas_info_; }
protected:
void Init() override;
void Run(OpContext<DeviceTensor> *const context) override { SendMemoryAllocReq(context); }
private:
friend class SchedulerHelper;
SomasInfo *somas_info_;
DeviceTensorPtr created_device_tensor_;
std::vector<DeviceTensor *> memory_alloc_list_;
};
using MemoryAllocActorPtr = std::shared_ptr<MemoryAllocActor>;
} // namespace runtime
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_ALLOC_ACTOR_H_

View File

@ -0,0 +1,41 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/graph_scheduler/actor/memory/memory_free_actor.h"
#include "runtime/graph_scheduler/actor/memory_manager_actor.h"
namespace mindspore {
namespace runtime {
void MemoryFreeActor::Run(OpContext<DeviceTensor> *const context) {
SendMemoryFreeReq(context);
PostRun(context);
}
void MemoryFreeActor::SendMemoryFreeReq(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(somas_info_);
MS_EXCEPTION_IF_CHECK_FAIL((!device_contexts_.empty()), "The device context doesn't exist.");
MS_LOG(DEBUG) << GetAID().Name() << " free memory: " << somas_info_->base_address_;
if (ActorDispatcher::is_memory_free_sync()) {
ActorDispatcher::SendSync(memory_manager_aid_, &MemoryManagerActor::FreeMemorydirectly, &somas_info_->base_address_,
device_contexts_[0]);
} else {
ActorDispatcher::Send(memory_manager_aid_, &MemoryManagerActor::FreeMemorydirectly, &somas_info_->base_address_,
device_contexts_[0]);
}
}
} // namespace runtime
} // namespace mindspore

View File

@ -0,0 +1,58 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_FREE_ACTOR_H_
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_FREE_ACTOR_H_
#include <string>
#include <memory>
#include "runtime/graph_scheduler/actor/memory_aware_actor.h"
namespace mindspore {
namespace runtime {
using mindspore::session::SomasInfo;
// The memory free actor is used to free memory of the whole graph at the end of graph running.
class MemoryFreeActor : public MemoryAwareActor {
public:
MemoryFreeActor(const std::string &name, const AID &memory_manager_aid, SomasInfo *somas_info,
const DeviceContext *device_context)
: MemoryAwareActor(name, KernelTransformType::kMemoryFreeActor, nullptr, memory_manager_aid),
somas_info_(somas_info) {
(void)device_contexts_.emplace_back(device_context);
}
~MemoryFreeActor() override = default;
// The memory related operation interface.
void SendMemoryFreeReq(OpContext<DeviceTensor> *const context) override;
// Get the member.
SomasInfo *somas_info() const { return somas_info_; }
protected:
void Run(OpContext<DeviceTensor> *const context) override;
private:
friend class SchedulerHelper;
SomasInfo *somas_info_;
};
using MemoryFreeActorPtr = std::shared_ptr<MemoryFreeActor>;
} // namespace runtime
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_FREE_ACTOR_H_

View File

@ -182,6 +182,14 @@ void MemoryManagerActor::FreeBatchMemory(const std::vector<DeviceTensor *> *free
}
}
void MemoryManagerActor::FreeMemorydirectly(void **free_ptr, const DeviceContext *device_context) {
MS_EXCEPTION_IF_NULL(free_ptr);
MS_EXCEPTION_IF_NULL(*free_ptr);
MS_EXCEPTION_IF_NULL(device_context);
device_context->device_res_manager_->FreeMemory(*free_ptr);
*free_ptr = nullptr;
}
void MemoryManagerActor::Wait(OpContext<DeviceTensor> *const op_context, const AID &from_aid) {
// Call back to the from actor to process.
ActorDispatcher::Send(from_aid, &MemoryAwareActor::OnMemoryAllocFinish, op_context);

View File

@ -59,6 +59,8 @@ class MemoryManagerActor : public ActorBase {
void FreeBatchMemory(const std::vector<DeviceTensor *> *free_list,
const std::vector<const DeviceContext *> *device_contexts,
OpContext<DeviceTensor> *const op_context, const AID &from_aid);
// Not use the ref count and free the memory directly, only for free in memory pool.
void FreeMemorydirectly(void **free_ptr, const DeviceContext *device_context);
// Wait the MemoryManagerActor to finish running all current messages.
void Wait(OpContext<DeviceTensor> *const op_context, const AID &from_aid);

View File

@ -226,7 +226,7 @@ bool SuperKernelActor::CopyInputData(const OpContext<DeviceTensor> *context) {
for (auto &device_tensor_store_key : device_tensor_store_keys_) {
auto input_device_tensor = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second.get(),
device_contexts_[0]->GetDeviceType());
// todo:
// Ge backend maybe nullptr.
if (input_device_tensor == nullptr) {
continue;
}

View File

@ -115,6 +115,9 @@ void SchedulerHelper::AddDataArrow(AbstractActor *const from_actor, AbstractActo
to_actor->input_datas_num_++;
(void)to_actor->input_data_arrow_aids_.emplace_back(std::make_pair(from_actor->GetAID(), data_arrow.get()));
AddMemorySign(from_actor, to_actor, SizeToInt(from_actor->output_data_arrows_.size() - 1),
SizeToInt(to_actor->input_data_arrow_aids_.size() - 1), true);
if (from_kernel == nullptr) {
return;
}
@ -176,6 +179,9 @@ void SchedulerHelper::AddControlArrow(AbstractActor *const from_actor, AbstractA
(void)from_actor->output_control_arrows_.emplace_back(control_arrow);
to_actor->input_controls_num_++;
(void)to_actor->input_control_arrow_aids_.emplace_back(std::make_pair(from_actor->GetAID(), control_arrow.get()));
AddMemorySign(from_actor, to_actor, SizeToInt(from_actor->output_control_arrows_.size() - 1),
SizeToInt(to_actor->input_control_arrow_aids_.size() - 1), false);
}
void SchedulerHelper::AddPartialArrow(ControlActor *const from_actor, ControlActor *const to_actor, size_t from_index,
@ -462,6 +468,100 @@ void SchedulerHelper::AddArrowForFusionActor(FusionActor *fusion_actor) {
}
}
void SchedulerHelper::AddMemorySign(AbstractActor *const from_actor, AbstractActor *const to_actor,
int32_t from_position, int32_t to_position, bool is_data_arrow) {
MS_EXCEPTION_IF_NULL(from_actor);
MS_EXCEPTION_IF_NULL(to_actor);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) == kOptimizeO0) {
return;
}
if ((from_actor->type() != KernelTransformType::kKernelActor) &&
(to_actor->type() != KernelTransformType::kKernelActor)) {
return;
}
// Add the somas info.
KernelGraphPtr from_graph = nullptr;
KernelActor *from_kernel_actor = nullptr;
if (from_actor->type() == KernelTransformType::kKernelActor) {
from_kernel_actor = dynamic_cast<KernelActor *>(from_actor);
MS_EXCEPTION_IF_NULL(from_kernel_actor);
from_graph = AnfAlgo::FetchKernelGraph(from_kernel_actor->kernel());
MS_EXCEPTION_IF_NULL(from_graph);
AddSomasInfo(from_kernel_actor, from_graph);
}
KernelGraphPtr to_graph = nullptr;
KernelActor *to_kernel_actor = nullptr;
if (to_actor->type() == KernelTransformType::kKernelActor) {
to_kernel_actor = dynamic_cast<KernelActor *>(to_actor);
MS_EXCEPTION_IF_NULL(to_kernel_actor);
to_graph = AnfAlgo::FetchKernelGraph(to_kernel_actor->kernel());
MS_EXCEPTION_IF_NULL(to_graph);
AddSomasInfo(to_kernel_actor, to_graph);
}
// Add the memory alloc and free sign at the boundary of the graph.
if ((from_graph != nullptr) && (to_graph != nullptr)) {
// The same graph no need insert the memory actor.
if (from_graph->graph_id() == to_graph->graph_id()) {
return;
}
AddMemoryFreeSign(from_kernel_actor, from_position, from_graph, is_data_arrow);
AddMemoryAllocSign(to_kernel_actor, to_position, to_graph, is_data_arrow);
} else if (from_graph != nullptr) {
AddMemoryFreeSign(from_kernel_actor, from_position, from_graph, is_data_arrow);
} else if (to_graph != nullptr) {
AddMemoryAllocSign(to_kernel_actor, to_position, to_graph, is_data_arrow);
}
}
void SchedulerHelper::AddMemoryAllocSign(KernelActor *const to_actor, int32_t to_position,
const KernelGraphPtr &to_graph, bool is_data_arrow) {
MS_EXCEPTION_IF_NULL(to_actor);
MS_EXCEPTION_IF_NULL(to_graph);
// Somas is not work for this graph.
if (to_graph->somas_whole_block_size() == 0) {
return;
}
// Set the memory alloc info.
to_actor->memory_alloc_insert_position_ = std::make_pair(to_position, is_data_arrow);
}
void SchedulerHelper::AddMemoryFreeSign(KernelActor *const from_actor, int32_t from_position,
const KernelGraphPtr &from_graph, bool is_data_arrow) {
MS_EXCEPTION_IF_NULL(from_actor);
MS_EXCEPTION_IF_NULL(from_graph);
// Somas is not work for this graph.
if (from_graph->somas_whole_block_size() == 0) {
return;
}
// Set the memory free info.
from_actor->memory_free_insert_position_ = std::make_pair(from_position, is_data_arrow);
}
void SchedulerHelper::AddSomasInfo(KernelActor *const kernel_actor, const KernelGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(kernel_actor);
MS_EXCEPTION_IF_NULL(graph);
// Somas is not work for this graph.
if (graph->somas_whole_block_size() == 0) {
return;
}
if (kernel_actor->somas_info_ != nullptr) {
return;
}
// Set the somas info.
auto somas_info = graph->MutableSomasInfo();
MS_EXCEPTION_IF_NULL(somas_info);
somas_info->graph_id_ = graph->graph_id();
kernel_actor->somas_info_ = somas_info;
}
namespace {
void CheckKernelActorValid(const std::vector<KernelActorPtr> &kernel_actors) {
for (const auto &kernel_actor : kernel_actors) {

View File

@ -77,6 +77,17 @@ class SchedulerHelper {
static FusionActorPtr BuildFusionActor(const std::vector<AbstractActorPtr> &actors);
static void AddArrowForFusionActor(FusionActor *fusion_actor);
// The interface of integration of dynamic and static memory.
static void AddMemorySign(AbstractActor *const from_actor, AbstractActor *const to_actor, int32_t from_position,
int32_t to_position, bool is_data_arrow);
// Add the memory alloc sign for the head kernel actor of graph.
static void AddMemoryAllocSign(KernelActor *const to_actor, int32_t to_position, const KernelGraphPtr &to_graph,
bool is_data_arrow);
// Add the memory free sign for the tail kernel actor of graph.
static void AddMemoryFreeSign(KernelActor *const from_actor, int32_t from_position, const KernelGraphPtr &from_graph,
bool is_data_arrow);
static void AddSomasInfo(KernelActor *const kernel_actor, const KernelGraphPtr &graph);
// Check whether the actor set is valid.
static void CheckActorValid(const ActorSet *actor_set);

View File

@ -67,7 +67,8 @@ void *DeviceResManager::GetStream(size_t stream_id) const {
bool DeviceResManager::AllocateMemory(DeviceAddress *const &address) const {
MS_EXCEPTION_IF_NULL(address);
if (address->GetPtr() != nullptr) {
MS_LOG(EXCEPTION) << "Memory leak detected!";
MS_LOG(ERROR) << "Memory leak detected!";
return false;
}
auto device_ptr = AllocateMemory(address->GetSize());
@ -90,8 +91,7 @@ void DeviceResManager::FreeMemory(DeviceAddress *const &address) const {
return;
}
void *ptr = const_cast<void *>(address->GetPtr());
FreeMemory(ptr);
FreeMemory(address->GetMutablePtr());
address->set_ptr(nullptr);
}
} // namespace device