unified runtime optimize actor dump

This commit is contained in:
limingqi107 2021-10-11 10:22:56 +08:00
parent 8b42909722
commit a1ef8140a2
14 changed files with 410 additions and 304 deletions

View File

@ -96,13 +96,13 @@ void AbstractActor::SendOutput(OpContext<DeviceTensor> *const context) {
MS_EXCEPTION_IF_NULL(context);
// Must be the execution order: send result --> send data --> send control, avoid the illegal timing problem.
// 1.Send graph output result.
if (output_result_arrows_.size() != output_nodes_.size()) {
if (output_result_arrows_.size() != output_result_nodes_.size()) {
SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The size of output result arrows is not equal to the output nodes.");
}
size_t output_node_index = 0;
for (const auto &result_arrow : output_result_arrows_) {
MS_EXCEPTION_IF_NULL(result_arrow);
Async(result_arrow->to_op_id_, &OutputActor::CollectOutput, output_nodes_[output_node_index++],
Async(result_arrow->to_op_id_, &OutputActor::CollectOutput, output_result_nodes_[output_node_index++],
result_arrow->from_output_index_, result_arrow->to_input_index_, context);
}

View File

@ -53,6 +53,17 @@ class AbstractActor : public OpActor<DeviceTensor> {
// Get the position of node in the actor.
virtual size_t FetchNodePosition(const AnfNodePtr &node) const { return 0; }
// Get the member.
KernelTransformType type() const { return type_; }
const std::vector<const DeviceContext *> &device_contexts() const { return device_contexts_; }
const std::vector<AnfNodePtr> &output_result_nodes() const { return output_result_nodes_; }
const std::vector<DataArrowPtr> &output_result_arrows() const { return output_result_arrows_; }
const std::vector<std::pair<size_t, AnfNodePtr>> &device_tensor_store_keys() const {
return device_tensor_store_keys_;
}
const std::vector<AID> &input_data_arrow_aids() const { return input_data_arrow_aids_; }
const std::vector<AID> &input_control_arrow_aids() const { return input_control_arrow_aids_; }
protected:
friend class GraphScheduler;
@ -84,7 +95,7 @@ class AbstractActor : public OpActor<DeviceTensor> {
std::vector<OpDataUniquePtr<DeviceTensor>> output_data_;
// The output nodes and output result arrows of graph output.
std::vector<AnfNodePtr> output_nodes_;
std::vector<AnfNodePtr> output_result_nodes_;
std::vector<DataArrowPtr> output_result_arrows_;
// The dependent device tensor stores, the dependent expression is pair<index, AnfNode>.

View File

@ -0,0 +1,303 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/framework/actor/actor_dump.h"
namespace mindspore {
namespace runtime {
namespace {
void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
if (actor->device_contexts().size() > 0) {
ofs << "\t\tdevice_contexts:" << actor->device_contexts().size() << "\n ";
for (const auto &device_context : actor->device_contexts()) {
if (device_context == nullptr) {
ofs << "\t\t\tdevice_context:" << device_context << "\n";
continue;
}
ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
}
}
if (actor->device_tensor_store_keys().size() > 0) {
ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys().size() << "\n ";
for (const auto &device_tensor_store_key : actor->device_tensor_store_keys()) {
MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
<< "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
}
}
if (actor->input_data_arrow_aids().size() > 0) {
ofs << "\t\tinput_data_arrow_actors:" << actor->input_data_arrow_aids().size() << "\n ";
for (const auto &input_data_arrow_aid : actor->input_data_arrow_aids()) {
ofs << "\t\t\tfrom_actor_name:" << input_data_arrow_aid.Name() << "\n";
}
}
if (actor->input_control_arrow_aids().size() > 0) {
ofs << "\t\tinput_control_arrow_actors:" << actor->input_control_arrow_aids().size() << "\n ";
for (const auto &input_control_arrow_aid : actor->input_control_arrow_aids()) {
ofs << "\t\t\tfrom_actor_name:" << input_control_arrow_aid.Name() << "\n";
}
}
const auto &output_data_arrows = actor->output_data_arrows();
if (output_data_arrows.size() > 0) {
ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
for (const auto &data_arrow : output_data_arrows) {
MS_EXCEPTION_IF_NULL(data_arrow);
ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
<< "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
<< "\n";
}
}
const auto &output_control_arrows = actor->output_control_arrows();
if (output_control_arrows.size() > 0) {
ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
for (const auto &aid : output_control_arrows) {
ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
}
}
if (actor->output_result_arrows().size() != actor->output_result_nodes().size()) {
MS_LOG(EXCEPTION) << "The size of output result arrows is not equal to the output nodes.";
}
if (actor->output_result_arrows().size() > 0) {
ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows().size() << "\n ";
for (size_t i = 0; i < actor->output_result_arrows().size(); ++i) {
auto result_arrow = actor->output_result_arrows()[i];
auto output_node = actor->output_result_nodes()[i];
MS_EXCEPTION_IF_NULL(result_arrow);
MS_EXCEPTION_IF_NULL(output_node);
ofs << "\t\t\tfrom_output_node:" << output_node->fullname_with_scope()
<< "\tfrom_output_index:" << result_arrow->from_output_index_
<< "\tto_actor_name:" << result_arrow->to_op_id_.Name()
<< "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
}
}
}
void DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
const auto &actor_name = actor->GetAID().Name();
ofs << "\tactor_name:" << actor_name << "\n";
if (actor->type() == KernelTransformType::kDeviceDataSourceActor) {
// Dump the member info of device queue data source actor.
const auto &device_queue_ds_actor = dynamic_cast<const DeviceQueueDataSourceActor *>(actor);
MS_EXCEPTION_IF_NULL(device_queue_ds_actor);
const auto &data_kernel = device_queue_ds_actor->data_kernel();
MS_EXCEPTION_IF_NULL(data_kernel);
ofs << "\t\tdata_kernel_name:" << data_kernel->fullname_with_scope()
<< "\tinput_number:" << AnfAlgo::GetInputTensorNum(data_kernel)
<< "\toutput_number:" << AnfAlgo::GetOutputTensorNum(data_kernel) << "\n";
for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(data_kernel); ++i) {
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_kernel, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
} else if (actor->type() == KernelTransformType::kHostDataSourceActor) {
// Dump the member info of host queue data source actor.
const auto &host_queue_ds_actor = dynamic_cast<const HostQueueDataSourceActor *>(actor);
MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
ofs << "\t\tdata_nodes:" << host_queue_ds_actor->data_nodes().size() << "\n";
for (size_t i = 0; i < host_queue_ds_actor->data_nodes().size(); ++i) {
const auto &data_node = host_queue_ds_actor->data_nodes()[i];
MS_EXCEPTION_IF_NULL(data_node);
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_node, 0, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\tnode_order_number:" << i << "\tnode_name:" << data_node->fullname_with_scope()
<< "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n";
}
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
const auto &kernel = actor->kernel();
MS_EXCEPTION_IF_NULL(kernel);
ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinputs_num:" << AnfAlgo::GetInputTensorNum(kernel)
<< "\toutputs_num:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel); ++i) {
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
const auto &graph = actor->graph();
MS_EXCEPTION_IF_NULL(graph);
ofs << "\t\tgraph_id:" << graph->graph_id() << "\tgraphl_name:" << graph->ToString()
<< "\tis_sink:" << graph->is_sink() << "\tinputs_num:" << (graph->input_nodes()).size()
<< "\tkernels_num:" << (graph->execution_order()).size() << "\n";
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
auto device_tensor = actor->output();
if (device_tensor != nullptr) {
ofs << "\t\toutput_index:" << 0 << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
}
void DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
}
} // namespace
void DumpDataPrepareActor(const DataPrepareActorPtr &actor, std::ofstream &ofs) {
ofs << "\n\n[Data prepare actor:" << (actor != nullptr ? 1 : 0) << "]\n";
if (actor == nullptr) {
return;
}
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
DumpAbstractActor(actor.get(), ofs);
ofs << "\t\tcontinuous_memory_nodes:" << actor->continuous_memory_nodes().size() << "\n ";
for (const auto &iter : actor->continuous_memory_nodes()) {
MS_EXCEPTION_IF_NULL(iter.first.first);
MS_EXCEPTION_IF_NULL(iter.first.second);
ofs << "\t\t\tnode_name:" << iter.first.first->fullname_with_scope()
<< "\tdevice_context:" << iter.first.second->device_context_key().ToString()
<< "\tis_input_need:" << iter.second.first << "\tis_output_need:" << iter.second.second << "\n";
}
}
void DumpLoopCountActor(const LoopCountActorPtr &actor, std::ofstream &ofs) {
ofs << "\n\n[Loop count actor:" << (actor != nullptr ? 1 : 0) << "]\n";
if (actor == nullptr) {
return;
}
ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count() << "\n";
DumpAbstractActor(actor.get(), ofs);
const size_t kOutputControlArrowsNum = 2;
ofs << "\t\toutput_control_arrows:" << kOutputControlArrowsNum << "\n ";
ofs << "\t\t\tto_actor_name:" << actor->output_aid().Name() << "\n";
ofs << "\t\t\tto_actor_name:" << actor->data_prepare_aid().Name() << "\n";
}
void DumpOutputActor(const OutputActorPtr &actor, std::ofstream &ofs) {
ofs << "\n\n[Output actor:" << (actor != nullptr ? 1 : 0) << "]\n";
if (actor == nullptr) {
return;
}
ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count()
<< "\toutputs_num:" << actor->outputs_num() << "\n";
DumpAbstractActor(actor.get(), ofs);
ofs << "\t\tinput_result_arrows:" << actor->input_result_arrow_aids().size() << "\n ";
for (const auto &input_result_arrow_aid : actor->input_result_arrow_aids()) {
ofs << "\t\t\tfrom_actor_name:" << input_result_arrow_aid.Name() << "\n";
}
}
void DumpDSActors(const std::vector<DataSourceActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Data source actors:" << actors.size() << "]\n";
for (const auto &data_source_actor : actors) {
DumpDSActor(data_source_actor.get(), ofs);
}
}
void DumpKernelActors(const std::vector<KernelActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Kernel actors:" << actors.size() << "]\n";
for (const auto &kernel_actor : actors) {
DumpKernelActor(kernel_actor.get(), ofs);
}
}
void DumpSuperKernelActors(const std::vector<SuperKernelActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Super kernel actors:" << actors.size() << "]\n";
for (const auto &super_kernel_actor : actors) {
DumpSuperKernelActor(super_kernel_actor.get(), ofs);
}
}
void DumpNoInputKernelActors(const std::vector<AbstractActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[No input kernel actors:" << actors.size() << "]\n";
for (const auto &actor : actors) {
MS_EXCEPTION_IF_NULL(actor);
if (actor->type() == KernelTransformType::kKernelActor) {
auto kernel_actor = dynamic_cast<const KernelActor *>(actor.get());
MS_EXCEPTION_IF_NULL(kernel_actor);
DumpKernelActor(kernel_actor, ofs);
} else if (actor->type() == KernelTransformType::kSuperKernelActor) {
auto super_kernel_actor = dynamic_cast<const SuperKernelActor *>(actor.get());
MS_EXCEPTION_IF_NULL(super_kernel_actor);
DumpSuperKernelActor(super_kernel_actor, ofs);
}
}
}
void DumpCopyActors(const std::vector<CopyActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Copy actors:" << actors.size() << "]\n";
for (const auto &copy_actor : actors) {
DumpCopyActor(copy_actor.get(), ofs);
}
}
void DumpGatherActors(const std::vector<GatherActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Gather actors:" << actors.size() << "]\n";
for (const auto &gather_actor : actors) {
DumpGatherActor(gather_actor.get(), ofs);
}
}
void DumpSwitchActors(const std::vector<SwitchActorPtr> &actors, std::ofstream &ofs) {
ofs << "\n\n[Switch actors:" << actors.size() << "]\n";
for (const auto &switch_actor : actors) {
DumpSwitchActor(switch_actor.get(), ofs);
}
}
} // namespace runtime
} // namespace mindspore

View File

@ -0,0 +1,51 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_
#include <vector>
#include <string>
#include <memory>
#include <utility>
#include <fstream>
#include "runtime/framework/actor/abstract_actor.h"
#include "runtime/framework/actor/data_prepare_actor.h"
#include "runtime/framework/actor/data_source_actor.h"
#include "runtime/framework/actor/loop_count_actor.h"
#include "runtime/framework/actor/kernel_actor.h"
#include "runtime/framework/actor/super_kernel_actor.h"
#include "runtime/framework/actor/output_actor.h"
#include "runtime/framework/actor/copy_actor.h"
#include "runtime/framework/actor/control_flow/switch_actor.h"
#include "runtime/framework/actor/control_flow/gather_actor.h"
namespace mindspore {
namespace runtime {
void DumpDataPrepareActor(const DataPrepareActorPtr &actor, std::ofstream &ofs);
void DumpLoopCountActor(const LoopCountActorPtr &actor, std::ofstream &ofs);
void DumpOutputActor(const OutputActorPtr &actor, std::ofstream &ofs);
void DumpDSActors(const std::vector<DataSourceActorPtr> &actors, std::ofstream &ofs);
void DumpKernelActors(const std::vector<KernelActorPtr> &actors, std::ofstream &ofs);
void DumpSuperKernelActors(const std::vector<SuperKernelActorPtr> &actors, std::ofstream &ofs);
void DumpNoInputKernelActors(const std::vector<AbstractActorPtr> &actors, std::ofstream &ofs);
void DumpCopyActors(const std::vector<CopyActorPtr> &actors, std::ofstream &ofs);
void DumpGatherActors(const std::vector<GatherActorPtr> &actors, std::ofstream &ofs);
void DumpSwitchActors(const std::vector<SwitchActorPtr> &actors, std::ofstream &ofs);
} // namespace runtime
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_DUMP_H_

View File

@ -48,6 +48,8 @@ class CopyActor : public MemoryAwareActor {
// The copy processing after memory alloc finished.
void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;
const DeviceTensorPtr &output() const { return output_; }
protected:
void Run(OpContext<DeviceTensor> *const context) override;
void UpdateOutputData(OpData<DeviceTensor> *const output_data, const DataArrow *data_arrow,

View File

@ -61,6 +61,10 @@ class DataPrepareActor : public DebugAwareActor {
void SendMemoryAllocReq(OpContext<DeviceTensor> *const context) override;
void OnMemoryAllocFinish(OpContext<DeviceTensor> *const context) override;
const std::map<std::pair<CNodePtr, DeviceContext *>, std::pair<bool, bool>> &continuous_memory_nodes() const {
return continuous_memory_nodes_;
}
private:
friend class GraphScheduler;

View File

@ -89,6 +89,8 @@ class DeviceQueueDataSourceActor : public DataSourceActor {
void SendDebugReq(OpContext<DeviceTensor> *const context) override;
void OnDebugFinish(OpContext<DeviceTensor> *const context) override;
const CNodePtr &data_kernel() const { return data_kernel_; }
protected:
void FillDataBuffer() override;
void SendRecorderInfo(OpContext<DeviceTensor> *const context) const override;

View File

@ -73,6 +73,8 @@ class KernelActor : public DebugAwareActor {
// The callback after debug finished.
void OnDebugFinish(OpContext<DeviceTensor> *const context) override;
const CNodePtr &kernel() const { return kernel_; }
protected:
void Run(OpContext<DeviceTensor> *const context) override;
void SendRecorderInfo(OpContext<DeviceTensor> *const context) const override;

View File

@ -51,6 +51,11 @@ class LoopCountActor : public DebugAwareActor {
// The callback after debug finished.
void OnDebugFinish(OpContext<DeviceTensor> *const context) override;
// Get the member.
size_t loop_count() const { return loop_count_; }
const AID &data_prepare_aid() const { return data_prepare_aid_; }
const AID &output_aid() const { return output_aid_; }
protected:
void Run(OpContext<DeviceTensor> *const context) override;
void SendOutput(OpContext<DeviceTensor> *const context) override;

View File

@ -66,6 +66,10 @@ class OutputActor : public AbstractActor {
// context of tensor be rewritten in the next step or next loop.
void UpdateOutputDeviceAddress();
// Get the member.
size_t loop_count() const { return loop_count_; }
size_t outputs_num() const { return outputs_num_; }
const std::vector<AID> &input_result_arrow_aids() const { return input_result_arrow_aids_; }
std::vector<TensorPtr> &outputs() { return outputs_; }
private:

View File

@ -42,6 +42,8 @@ class SuperKernelActor : public DebugAwareActor {
void Init() override;
const KernelGraphPtr &graph() const { return graph_; }
protected:
void Run(OpContext<DeviceTensor> *const context) override;

View File

@ -1479,7 +1479,7 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
}
auto op_arrow = std::make_shared<DataArrow>(output_with_index.second, to_actor->GetAID(), output_position);
(void)from_actor->output_result_arrows_.emplace_back(op_arrow);
(void)from_actor->output_nodes_.emplace_back(output_with_index.first);
(void)from_actor->output_result_nodes_.emplace_back(output_with_index.first);
(void)to_actor->input_result_arrow_aids_.emplace_back(from_actor->GetAID());
// Update the real compute node in the host data source actor.
@ -1488,7 +1488,7 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
auto position = host_queue_ds_actor->FetchNodePosition(output_with_index.first);
auto real_node = host_queue_ds_actor->FetchNode(position);
from_actor->output_nodes_[from_actor->output_nodes_.size() - 1] = real_node;
from_actor->output_result_nodes_[from_actor->output_result_nodes_.size() - 1] = real_node;
UpdateRefCount(real_node, output_with_index.second, true);
}
}
@ -1757,277 +1757,22 @@ void GraphScheduler::DumpActor(const ActorSet *actor_set, const GraphCompilerInf
return;
}
ofs << "[Device tensor stores]\n";
DumpDeviceTensorStore(graph_compiler_info, ofs);
const auto &data_prepare_actor = actor_set->data_prepare_actor_;
ofs << "\n\n[Data prepare actor:" << (data_prepare_actor != nullptr ? 1 : 0) << "]\n";
if (data_prepare_actor != nullptr) {
DumpDataPrepareActor(data_prepare_actor.get(), ofs);
}
ofs << "\n\n[Data source actors:" << actor_set->data_source_actors_.size() << "]\n";
for (const auto &data_source_actor : actor_set->data_source_actors_) {
DumpDSActor(data_source_actor.get(), ofs);
}
ofs << "\n\n[Kernel actors:" << actor_set->kernel_actors_.size() << "]\n";
for (const auto &kernel_actor : actor_set->kernel_actors_) {
DumpKernelActor(kernel_actor.get(), ofs);
}
ofs << "\n\n[Super kernel actors:" << actor_set->super_kernel_actors_.size() << "]\n";
for (const auto &super_kernel_actor : actor_set->super_kernel_actors_) {
DumpSuperKernelActor(super_kernel_actor.get(), ofs);
}
ofs << "\n\n[No input kernel actors:" << actor_set->no_input_kernel_actors_.size() << "]\n";
for (const auto &no_input_kernel_actor : actor_set->no_input_kernel_actors_) {
DumpNoInputKernelActor(no_input_kernel_actor.get(), ofs);
}
ofs << "\n\n[Copy actors:" << actor_set->copy_actors_.size() << "]\n";
for (const auto &copy_actor : actor_set->copy_actors_) {
DumpCopyActor(copy_actor.get(), ofs);
}
ofs << "\n\n[Gather actors:" << actor_set->gather_actors_.size() << "]\n";
for (const auto &gather_actor : actor_set->gather_actors_) {
DumpGatherActor(gather_actor.get(), ofs);
}
ofs << "\n\n[Switch actors:" << actor_set->switch_actors_.size() << "]\n";
for (const auto &switch_actor : actor_set->switch_actors_) {
DumpSwitchActor(switch_actor.get(), ofs);
}
const auto &loop_count_actor = actor_set->loop_count_actor_;
ofs << "\n\n[Loop count actor:" << (loop_count_actor != nullptr ? 1 : 0) << "]\n";
if (loop_count_actor != nullptr) {
DumpLoopCountActor(loop_count_actor.get(), ofs);
}
const auto &output_actor = actor_set->output_actor_;
ofs << "\n\n[Output actor:" << (output_actor != nullptr ? 1 : 0) << "]\n";
if (output_actor != nullptr) {
DumpOutputActor(output_actor.get(), ofs);
}
}
void GraphScheduler::DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
if (actor->device_contexts_.size() > 0) {
ofs << "\t\tdevice_contexts:" << actor->device_contexts_.size() << "\n ";
for (const auto &device_context : actor->device_contexts_) {
if (device_context == nullptr) {
ofs << "\t\t\tdevice_context:" << device_context << "\n";
continue;
}
ofs << "\t\t\tdevice_context:" << device_context->device_context_key().ToString() << "\n";
}
}
if (actor->device_tensor_store_keys_.size() > 0) {
ofs << "\t\tdevice_tensor_store_keys:" << actor->device_tensor_store_keys_.size() << "\n ";
for (const auto &device_tensor_store_key : actor->device_tensor_store_keys_) {
MS_EXCEPTION_IF_NULL(device_tensor_store_key.second);
ofs << "\t\t\tto_input_index:" << device_tensor_store_key.first
<< "\tfrom_node_name:" << device_tensor_store_key.second->fullname_with_scope() << "\n";
}
}
if (actor->input_data_arrow_aids_.size() > 0) {
ofs << "\t\tinput_data_arrow_actors:" << actor->input_data_arrow_aids_.size() << "\n ";
for (const auto &input_data_arrow_aid : actor->input_data_arrow_aids_) {
ofs << "\t\t\tfrom_actor_name:" << input_data_arrow_aid.Name() << "\n";
}
}
if (actor->input_control_arrow_aids_.size() > 0) {
ofs << "\t\tinput_control_arrow_actors:" << actor->input_control_arrow_aids_.size() << "\n ";
for (const auto &input_control_arrow_aid : actor->input_control_arrow_aids_) {
ofs << "\t\t\tfrom_actor_name:" << input_control_arrow_aid.Name() << "\n";
}
}
const auto &output_data_arrows = actor->output_data_arrows();
if (output_data_arrows.size() > 0) {
ofs << "\t\toutput_data_arrows:" << output_data_arrows.size() << "\n ";
for (const auto &data_arrow : output_data_arrows) {
MS_EXCEPTION_IF_NULL(data_arrow);
ofs << "\t\t\tfrom_output_index:" << data_arrow->from_output_index_
<< "\tto_actor_name:" << data_arrow->to_op_id_.Name() << "\tto_input_index:" << data_arrow->to_input_index_
<< "\n";
}
}
const auto &output_control_arrows = actor->output_control_arrows();
if (output_control_arrows.size() > 0) {
ofs << "\t\toutput_control_arrows:" << output_control_arrows.size() << "\n ";
for (const auto &aid : output_control_arrows) {
ofs << "\t\t\tto_actor_name:" << aid.Name() << "\n";
}
}
if (actor->output_result_arrows_.size() != actor->output_nodes_.size()) {
MS_LOG(EXCEPTION) << "The size of output result arrows is not equal to the output nodes.";
}
if (actor->output_result_arrows_.size() > 0) {
ofs << "\t\toutput_result_arrows:" << actor->output_result_arrows_.size() << "\n ";
for (size_t i = 0; i < actor->output_result_arrows_.size(); ++i) {
auto result_arrow = actor->output_result_arrows_[i];
auto output_node = actor->output_nodes_[i];
MS_EXCEPTION_IF_NULL(result_arrow);
MS_EXCEPTION_IF_NULL(output_node);
ofs << "\t\t\tfrom_output_node:" << output_node->fullname_with_scope()
<< "\tfrom_output_index:" << result_arrow->from_output_index_
<< "\tto_actor_name:" << result_arrow->to_op_id_.Name()
<< "\toutput_node_position:" << result_arrow->to_input_index_ << "\n";
}
}
}
void GraphScheduler::DumpDataPrepareActor(const DataPrepareActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
DumpAbstractActor(actor, ofs);
ofs << "\t\tcontinuous_memory_nodes:" << actor->continuous_memory_nodes_.size() << "\n ";
for (const auto &iter : actor->continuous_memory_nodes_) {
MS_EXCEPTION_IF_NULL(iter.first.first);
MS_EXCEPTION_IF_NULL(iter.first.second);
ofs << "\t\t\tnode_name:" << iter.first.first->fullname_with_scope()
<< "\tdevice_context:" << iter.first.second->device_context_key().ToString()
<< "\tis_input_need:" << iter.second.first << "\tis_output_need:" << iter.second.second << "\n";
}
}
void GraphScheduler::DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
const auto &actor_name = actor->GetAID().Name();
ofs << "\tactor_name:" << actor_name << "\n";
if (actor->type_ == KernelTransformType::kDeviceDataSourceActor) {
// Dump the member info of device queue data source actor.
const auto &device_queue_ds_actor = dynamic_cast<const DeviceQueueDataSourceActor *>(actor);
MS_EXCEPTION_IF_NULL(device_queue_ds_actor);
const auto &data_kernel = device_queue_ds_actor->data_kernel_;
MS_EXCEPTION_IF_NULL(data_kernel);
ofs << "\t\tdata_kernel_name:" << data_kernel->fullname_with_scope()
<< "\tinput_number:" << AnfAlgo::GetInputTensorNum(data_kernel)
<< "\toutput_number:" << AnfAlgo::GetOutputTensorNum(data_kernel) << "\n";
for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(data_kernel); ++i) {
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_kernel, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
} else if (actor->type_ == KernelTransformType::kHostDataSourceActor) {
// Dump the member info of host queue data source actor.
const auto &host_queue_ds_actor = dynamic_cast<const HostQueueDataSourceActor *>(actor);
MS_EXCEPTION_IF_NULL(host_queue_ds_actor);
ofs << "\t\tdata_nodes:" << host_queue_ds_actor->data_nodes_.size() << "\n";
for (size_t i = 0; i < host_queue_ds_actor->data_nodes_.size(); ++i) {
const auto &data_node = host_queue_ds_actor->data_nodes_[i];
MS_EXCEPTION_IF_NULL(data_node);
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(data_node, 0, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\tnode_order_number:" << i << "\tnode_name:" << data_node->fullname_with_scope()
<< "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n";
}
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void GraphScheduler::DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_ << "\n";
DumpAbstractActor(actor, ofs);
const size_t kOutputControlArrowsNum = 2;
ofs << "\t\toutput_control_arrows:" << kOutputControlArrowsNum << "\n ";
ofs << "\t\t\tto_actor_name:" << actor->output_aid_.Name() << "\n";
ofs << "\t\t\tto_actor_name:" << actor->data_prepare_aid_.Name() << "\n";
}
void GraphScheduler::DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
const auto &kernel = actor->kernel_;
MS_EXCEPTION_IF_NULL(kernel);
ofs << "\t\tkernel_name:" << kernel->fullname_with_scope() << "\tinputs_num:" << AnfAlgo::GetInputTensorNum(kernel)
<< "\toutputs_num:" << AnfAlgo::GetOutputTensorNum(kernel) << "\n";
for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel); ++i) {
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(kernel, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
ofs << "\t\t\toutput_index:" << i << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void GraphScheduler::DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
const auto &graph = actor->graph_;
MS_EXCEPTION_IF_NULL(graph);
ofs << "\t\tgraph_id:" << graph->graph_id() << "\tgraphl_name:" << graph->ToString()
<< "\tis_sink:" << graph->is_sink() << "\tinputs_num:" << (graph->input_nodes()).size()
<< "\tkernels_num:" << (graph->execution_order()).size() << "\n";
DumpAbstractActor(actor, ofs);
ofs << "\n";
}
void GraphScheduler::DumpNoInputKernelActor(const AbstractActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
if (actor->type_ == KernelTransformType::kKernelActor) {
auto kernel_actor = dynamic_cast<const KernelActor *>(actor);
MS_EXCEPTION_IF_NULL(kernel_actor);
DumpKernelActor(kernel_actor, ofs);
} else if (actor->type_ == KernelTransformType::kSuperKernelActor) {
auto super_kernel_actor = dynamic_cast<const SuperKernelActor *>(actor);
MS_EXCEPTION_IF_NULL(super_kernel_actor);
DumpSuperKernelActor(super_kernel_actor, ofs);
}
}
void GraphScheduler::DumpOutputActor(const OutputActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\tloop_count:" << actor->loop_count_
<< "\toutputs_num:" << actor->outputs_num_ << "\n";
DumpAbstractActor(actor, ofs);
ofs << "\t\tinput_result_arrows:" << actor->input_result_arrow_aids_.size() << "\n ";
for (const auto &input_result_arrow_aid : actor->input_result_arrow_aids_) {
ofs << "\t\t\tfrom_actor_name:" << input_result_arrow_aid.Name() << "\n";
}
}
void GraphScheduler::DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << "\n";
auto device_tensor = actor->output_;
if (device_tensor != nullptr) {
ofs << "\t\toutput_index:" << 0 << "\tptr:" << device_tensor->GetPtr() << "\tsize:" << device_tensor->GetSize()
<< "\toriginal_ref_count:" << device_tensor->original_ref_count() << "\n ";
}
DumpAbstractActor(actor, ofs);
ofs << "\n";
DumpDataPrepareActor(actor_set->data_prepare_actor_, ofs);
DumpDSActors(actor_set->data_source_actors_, ofs);
DumpKernelActors(actor_set->kernel_actors_, ofs);
DumpSuperKernelActors(actor_set->super_kernel_actors_, ofs);
DumpNoInputKernelActors(actor_set->no_input_kernel_actors_, ofs);
DumpCopyActors(actor_set->copy_actors_, ofs);
DumpGatherActors(actor_set->gather_actors_, ofs);
DumpSwitchActors(actor_set->switch_actors_, ofs);
DumpLoopCountActor(actor_set->loop_count_actor_, ofs);
DumpOutputActor(actor_set->output_actor_, ofs);
}
void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compiler_info, std::ofstream &ofs) const {
ofs << "[Device tensor stores]\n";
for (const auto &graph : graph_compiler_info.graphs_) {
MS_EXCEPTION_IF_NULL(graph);
ofs << "\tgraph_id:" << graph->graph_id() << "\tis_sink:" << graph->is_sink()
@ -2076,15 +1821,5 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
ofs << "\n";
}
}
void GraphScheduler::DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
}
void GraphScheduler::DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) const {
MS_EXCEPTION_IF_NULL(actor);
ofs << "\tactor_name:" << actor->GetAID().Name() << '\n';
}
} // namespace runtime
} // namespace mindspore

View File

@ -28,15 +28,7 @@
#include <algorithm>
#include <fstream>
#include "runtime/framework/graph_compiler.h"
#include "runtime/framework/actor/data_prepare_actor.h"
#include "runtime/framework/actor/data_source_actor.h"
#include "runtime/framework/actor/loop_count_actor.h"
#include "runtime/framework/actor/kernel_actor.h"
#include "runtime/framework/actor/super_kernel_actor.h"
#include "runtime/framework/actor/output_actor.h"
#include "runtime/framework/actor/copy_actor.h"
#include "runtime/framework/actor/control_flow/switch_actor.h"
#include "runtime/framework/actor/control_flow/gather_actor.h"
#include "runtime/framework/actor/actor_dump.h"
#include "thread/actor_threadpool.h"
namespace mindspore {
@ -223,17 +215,6 @@ class GraphScheduler {
// Display the actor information of corresponding kernel graph.
void DumpActor(const ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info) const;
void DumpAbstractActor(const AbstractActor *actor, std::ofstream &ofs) const;
void DumpDataPrepareActor(const DataPrepareActor *actor, std::ofstream &ofs) const;
void DumpDSActor(const DataSourceActor *actor, std::ofstream &ofs) const;
void DumpLoopCountActor(const LoopCountActor *actor, std::ofstream &ofs) const;
void DumpKernelActor(const KernelActor *actor, std::ofstream &ofs) const;
void DumpSuperKernelActor(const SuperKernelActor *actor, std::ofstream &ofs) const;
void DumpNoInputKernelActor(const AbstractActor *actor, std::ofstream &ofs) const;
void DumpOutputActor(const OutputActor *actor, std::ofstream &ofs) const;
void DumpCopyActor(const CopyActor *actor, std::ofstream &ofs) const;
void DumpGatherActor(const GatherActor *actor, std::ofstream &ofs) const;
void DumpSwitchActor(const SwitchActor *actor, std::ofstream &ofs) const;
void DumpDeviceTensorStore(const GraphCompilerInfo &graph_compiler_info, std::ofstream &ofs) const;
// The global maps, only be cleared in the deconstruction.

View File

@ -365,6 +365,7 @@ MindRTBackend::MindRTBackend(const std::string &backend_name, const std::string
const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
MS_EXCEPTION_IF_NULL(graph_compiler_);
MS_EXCEPTION_IF_NULL(func_graph);
MS_LOG(INFO) << "Status record: start compile function graph: " << func_graph->ToString();
auto root_graph = WrapPrimitives(func_graph);
MS_EXCEPTION_IF_NULL(root_graph);
root_graph_ = root_graph.get();
@ -403,6 +404,8 @@ const ActorInfo &MindRTBackend::CompileGraphs(const FuncGraphPtr &func_graph) {
MS_EXCEPTION_IF_NULL(graph_compiler_info);
const ActorInfo &actor_info = graph_compiler_info->name_;
(void)actor_to_graph_compiler_info_.emplace(graph_compiler_info->name_, std::move(graph_compiler_info));
MS_LOG(INFO) << "Status record: end compile function graph: " << func_graph->ToString()
<< ", produce actor: " << actor_info;
return actor_info;
}
@ -787,7 +790,6 @@ void MindRTBackend::RunGraphBySingleOp(const std::vector<KernelGraphPtr> &graphs
}
void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args, VectorRef *outputs) {
MS_LOG(INFO) << "Run actor begin, actor name: " << actor_info;
MS_EXCEPTION_IF_NULL(root_graph_);
if (IsGraphOutputValueNodeOrParameter(root_graph_->output(), args, outputs)) {
return;
@ -800,6 +802,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
return;
}
MS_LOG(INFO) << "Status record: start run actor: " << actor_info;
// Fetch the graph compiler info.
const auto &graph_iter = actor_to_graph_compiler_info_.find(actor_info);
if (graph_iter == actor_to_graph_compiler_info_.end()) {
@ -837,6 +840,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
// There will be more than one kernel graph in heterogeneous scenario in a ms function of PyNative Mode.
if (real_execution_mode_ == kPynativeMode) {
RunGraphBySingleOp(graph_compiler_info.graphs_, input_tensors, outputs);
MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
return;
}
// Run actor DAG.
@ -875,7 +879,7 @@ void MindRTBackend::RunGraph(const ActorInfo &actor_info, const VectorRef &args,
// Update device address for output node of graph.
actor_set->output_actor_->UpdateOutputDeviceAddress();
MS_LOG(INFO) << "Run actor end, actor name: " << actor_info;
MS_LOG(INFO) << "Status record: end run actor: " << actor_info;
}
void MindRTBackend::ConstructOutputs(const AnfNodePtr &output_node,