forked from mindspore-Ecosystem/mindspore
!33731 runtime optimize the actor common code
Merge pull request !33731 from limingqi107/bug_fix4
This commit is contained in:
commit
50a6735358
|
@ -977,6 +977,16 @@ std::vector<size_t> AnfRuntimeAlgorithm::GetOutputDeviceShapeAdaptively(const An
|
||||||
return device_shape;
|
return device_shape;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
KernelGraphPtr AnfRuntimeAlgorithm::FetchKernelGraph(const AnfNodePtr &node) {
|
||||||
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
const auto &func_graph = node->func_graph();
|
||||||
|
if (func_graph == nullptr) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
return func_graph->cast<KernelGraphPtr>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AnfNodePtr AnfRuntimeAlgorithm::FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph) {
|
AnfNodePtr AnfRuntimeAlgorithm::FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph) {
|
||||||
MS_EXCEPTION_IF_NULL(backend_node);
|
MS_EXCEPTION_IF_NULL(backend_node);
|
||||||
auto front_node_with_index = graph.GetFrontNodeByInternalParameter(backend_node);
|
auto front_node_with_index = graph.GetFrontNodeByInternalParameter(backend_node);
|
||||||
|
@ -1253,5 +1263,30 @@ bool AnfRuntimeAlgorithm::IsNeedUpdateShapeAndTypeAfterLaunch(const AnfNodePtr &
|
||||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||||
return kernel_mod->IsNeedRetrieveOutputShape();
|
return kernel_mod->IsNeedRetrieveOutputShape();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AnfRuntimeAlgorithm::UpdateOutputAddrSize(device::KernelInfo *kernel_info, const CNodePtr &kernel) {
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_info);
|
||||||
|
auto &output_addresses = kernel_info->output_address_list();
|
||||||
|
for (size_t i = 0; i < output_addresses.size(); ++i) {
|
||||||
|
auto output_address = output_addresses[i].get();
|
||||||
|
MS_EXCEPTION_IF_NULL(output_address);
|
||||||
|
auto output_addr_size = AnfAlgo::GetOutputTensorMemSize(kernel, i);
|
||||||
|
if (output_addr_size != output_address->GetSize()) {
|
||||||
|
output_address->SetSize(output_addr_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AnfRuntimeAlgorithm::UpdateInternalParameterShape(const std::map<size_t, AnfNodeWeakPtr> &internal_parameters,
|
||||||
|
const CNodePtr &cnode) {
|
||||||
|
MS_EXCEPTION_IF_NULL(cnode);
|
||||||
|
for (auto &internal_parameter_iter : internal_parameters) {
|
||||||
|
auto internal_parameter = internal_parameter_iter.second.lock();
|
||||||
|
MS_EXCEPTION_IF_NULL(internal_parameter);
|
||||||
|
common::AnfAlgo::SetOutputInferTypeAndShape(
|
||||||
|
{common::AnfAlgo::GetOutputInferDataType(cnode, internal_parameter_iter.first)},
|
||||||
|
{common::AnfAlgo::GetOutputInferShape(cnode, internal_parameter_iter.first)}, internal_parameter.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace session
|
} // namespace session
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -162,6 +162,7 @@ class BACKEND_EXPORT AnfRuntimeAlgorithm {
|
||||||
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
|
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
|
||||||
static std::vector<size_t> GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
|
static std::vector<size_t> GetInputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
|
||||||
static std::vector<size_t> GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
|
static std::vector<size_t> GetOutputDeviceShapeAdaptively(const AnfNodePtr &anf_node, size_t index);
|
||||||
|
static KernelGraphPtr FetchKernelGraph(const AnfNodePtr &node);
|
||||||
static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph);
|
static AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraph &graph);
|
||||||
static void InsertMakeTupleForOutput(const NotNull<KernelGraphPtr> &root_graph);
|
static void InsertMakeTupleForOutput(const NotNull<KernelGraphPtr> &root_graph);
|
||||||
// Save inputs/outputs/workspace address in kernel_mod.
|
// Save inputs/outputs/workspace address in kernel_mod.
|
||||||
|
@ -173,6 +174,12 @@ class BACKEND_EXPORT AnfRuntimeAlgorithm {
|
||||||
static bool IsDynamicShapeSkipExecute(const CNodePtr &cnode);
|
static bool IsDynamicShapeSkipExecute(const CNodePtr &cnode);
|
||||||
// return true if need to update output's shape and type after launch
|
// return true if need to update output's shape and type after launch
|
||||||
static bool IsNeedUpdateShapeAndTypeAfterLaunch(const AnfNodePtr &cnode);
|
static bool IsNeedUpdateShapeAndTypeAfterLaunch(const AnfNodePtr &cnode);
|
||||||
|
// The size of output address may be changed in dynamic shape scenario, for example, the output shape of operator
|
||||||
|
// 'Unique' will change after Launch, the output address size should update.
|
||||||
|
static void UpdateOutputAddrSize(device::KernelInfo *kernel_info, const CNodePtr &kernel);
|
||||||
|
// Update the shape of internal parameter in the sub graph.
|
||||||
|
static void UpdateInternalParameterShape(const std::map<size_t, AnfNodeWeakPtr> &internal_parameters,
|
||||||
|
const CNodePtr &cnode);
|
||||||
};
|
};
|
||||||
} // namespace session
|
} // namespace session
|
||||||
using AnfAlgo = session::AnfRuntimeAlgorithm;
|
using AnfAlgo = session::AnfRuntimeAlgorithm;
|
||||||
|
|
|
@ -225,6 +225,13 @@ class COMMON_EXPORT AnfAlgo {
|
||||||
return attr_dup != nullptr && GetValue<bool>(attr_dup);
|
return attr_dup != nullptr && GetValue<bool>(attr_dup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check whether the node has Ref abstract.
|
||||||
|
static inline bool HasAbstractRef(const AnfNodePtr &node) {
|
||||||
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
auto &abs = node->abstract();
|
||||||
|
return (abs != nullptr) && abs->isa<abstract::AbstractRef>();
|
||||||
|
}
|
||||||
|
|
||||||
// Get the real output node and indexes of get item, make tuple, depend, load.
|
// Get the real output node and indexes of get item, make tuple, depend, load.
|
||||||
static AnfNodePtr GetTupleIndexes(const AnfNodePtr &node, std::vector<size_t> *index_stack);
|
static AnfNodePtr GetTupleIndexes(const AnfNodePtr &node, std::vector<size_t> *index_stack);
|
||||||
static bool IsNopNode(const AnfNodePtr &node);
|
static bool IsNopNode(const AnfNodePtr &node);
|
||||||
|
|
|
@ -236,41 +236,13 @@ void FreeMemoryByRefCount(DeviceTensor *const device_tensor, const DeviceContext
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraphPtr &graph) {
|
|
||||||
MS_EXCEPTION_IF_NULL(backend_node);
|
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
|
||||||
auto front_node = AnfAlgo::FetchFrontNodeByBackendNode(backend_node, *graph);
|
|
||||||
return front_node;
|
|
||||||
}
|
|
||||||
|
|
||||||
KernelWithIndex FetchFrontNodeWithIndexByGraphOutput(const KernelWithIndex &output_with_index,
|
|
||||||
const KernelGraphPtr &graph) {
|
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
|
||||||
auto front_node_with_index = graph->GetFrontNodeWithIndexByGraphOutput(output_with_index);
|
|
||||||
// PyNative forward graph does not has front node, using backend node instead.
|
|
||||||
if (front_node_with_index.first == nullptr) {
|
|
||||||
front_node_with_index = output_with_index;
|
|
||||||
}
|
|
||||||
return front_node_with_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
KernelGraphPtr FetchKernelGraph(const AnfNodePtr &node) {
|
|
||||||
MS_EXCEPTION_IF_NULL(node);
|
|
||||||
const auto &func_graph = node->func_graph();
|
|
||||||
if (func_graph == nullptr) {
|
|
||||||
return nullptr;
|
|
||||||
} else {
|
|
||||||
return func_graph->cast<KernelGraphPtr>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const KernelGraphPtr &graph,
|
KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const KernelGraphPtr &graph,
|
||||||
const std::vector<AnfNodePtr> &host_parameters,
|
const std::vector<AnfNodePtr> &host_parameters,
|
||||||
GraphExecutionStrategy strategy) {
|
GraphExecutionStrategy strategy) {
|
||||||
// Fetch kernel graph.
|
// Fetch kernel graph.
|
||||||
KernelGraphPtr kernel_graph = nullptr;
|
KernelGraphPtr kernel_graph = nullptr;
|
||||||
if (graph == nullptr) {
|
if (graph == nullptr) {
|
||||||
kernel_graph = FetchKernelGraph(node);
|
kernel_graph = AnfAlgo::FetchKernelGraph(node);
|
||||||
} else {
|
} else {
|
||||||
kernel_graph = graph;
|
kernel_graph = graph;
|
||||||
}
|
}
|
||||||
|
@ -312,7 +284,7 @@ std::string FetchActorName(KernelTransformType kernel_type, const std::string &a
|
||||||
// Fetch kernel graph.
|
// Fetch kernel graph.
|
||||||
KernelGraphPtr kernel_graph = nullptr;
|
KernelGraphPtr kernel_graph = nullptr;
|
||||||
if (graph == nullptr) {
|
if (graph == nullptr) {
|
||||||
kernel_graph = FetchKernelGraph(node);
|
kernel_graph = AnfAlgo::FetchKernelGraph(node);
|
||||||
} else {
|
} else {
|
||||||
kernel_graph = graph;
|
kernel_graph = graph;
|
||||||
}
|
}
|
||||||
|
@ -345,14 +317,6 @@ std::string FetchActorName(KernelTransformType kernel_type, const std::string &a
|
||||||
return actor_name;
|
return actor_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HasAbstractRef(const AnfNodePtr &node) {
|
|
||||||
if (node == nullptr) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
auto &abs = node->abstract();
|
|
||||||
return (abs != nullptr) && abs->isa<abstract::AbstractRef>();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::set<size_t> FetchModifiableRefInputIndex(const CNodePtr &cnode) {
|
std::set<size_t> FetchModifiableRefInputIndex(const CNodePtr &cnode) {
|
||||||
MS_EXCEPTION_IF_NULL(cnode);
|
MS_EXCEPTION_IF_NULL(cnode);
|
||||||
|
|
||||||
|
@ -363,7 +327,7 @@ std::set<size_t> FetchModifiableRefInputIndex(const CNodePtr &cnode) {
|
||||||
if (HasAbstractMonad(input)) {
|
if (HasAbstractMonad(input)) {
|
||||||
has_monad = true;
|
has_monad = true;
|
||||||
}
|
}
|
||||||
if (HasAbstractRef(input)) {
|
if (common::AnfAlgo::HasAbstractRef(input)) {
|
||||||
(void)ref_input_indexes.insert(i - 1);
|
(void)ref_input_indexes.insert(i - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -390,34 +354,11 @@ std::set<size_t> FetchModifiableRefOutputIndex(const CNodePtr &cnode, const Kern
|
||||||
}
|
}
|
||||||
auto input_pair = graph->GetRefCorrespondOutput(output_pair);
|
auto input_pair = graph->GetRefCorrespondOutput(output_pair);
|
||||||
MS_EXCEPTION_IF_NULL(input_pair.first);
|
MS_EXCEPTION_IF_NULL(input_pair.first);
|
||||||
if (HasAbstractRef(input_pair.first)) {
|
if (common::AnfAlgo::HasAbstractRef(input_pair.first)) {
|
||||||
(void)ref_output_indexes.insert(i);
|
(void)ref_output_indexes.insert(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ref_output_indexes;
|
return ref_output_indexes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateOutputAddrSize(KernelInfo *kernel_info, const CNodePtr &kernel) {
|
|
||||||
auto &output_addresses = kernel_info->output_address_list();
|
|
||||||
for (size_t i = 0; i < output_addresses.size(); ++i) {
|
|
||||||
auto output_address = output_addresses[i].get();
|
|
||||||
MS_EXCEPTION_IF_NULL(output_address);
|
|
||||||
auto output_addr_size = AnfAlgo::GetOutputTensorMemSize(kernel, i);
|
|
||||||
if (output_addr_size != output_address->GetSize()) {
|
|
||||||
output_address->SetSize(output_addr_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateInternalParameterShape(const std::map<size_t, AnfNodeWeakPtr> &internal_parameters, const CNodePtr &cnode) {
|
|
||||||
MS_EXCEPTION_IF_NULL(cnode);
|
|
||||||
for (auto &internal_parameter_iter : internal_parameters) {
|
|
||||||
auto internal_parameter = internal_parameter_iter.second.lock();
|
|
||||||
MS_EXCEPTION_IF_NULL(internal_parameter);
|
|
||||||
common::AnfAlgo::SetOutputInferTypeAndShape(
|
|
||||||
{common::AnfAlgo::GetOutputInferDataType(cnode, internal_parameter_iter.first)},
|
|
||||||
{common::AnfAlgo::GetOutputInferShape(cnode, internal_parameter_iter.first)}, internal_parameter.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace runtime
|
} // namespace runtime
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -233,31 +233,16 @@ void FreeMemoryByRefCount(DeviceTensor *const device_tensor, const DeviceContext
|
||||||
const std::string &op_name);
|
const std::string &op_name);
|
||||||
void FreeMemory(DeviceTensor *const device_tensor, const DeviceContext *device_context);
|
void FreeMemory(DeviceTensor *const device_tensor, const DeviceContext *device_context);
|
||||||
|
|
||||||
// Get front node by backend node.
|
|
||||||
AnfNodePtr FetchFrontNodeByBackendNode(const AnfNodePtr &backend_node, const KernelGraphPtr &graph);
|
|
||||||
KernelWithIndex FetchFrontNodeWithIndexByGraphOutput(const KernelWithIndex &output_with_index,
|
|
||||||
const KernelGraphPtr &graph);
|
|
||||||
|
|
||||||
KernelGraphPtr FetchKernelGraph(const AnfNodePtr &node);
|
|
||||||
KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const KernelGraphPtr &graph,
|
KernelTransformType FetchKernelTransformType(const AnfNodePtr &node, const KernelGraphPtr &graph,
|
||||||
const std::vector<AnfNodePtr> &host_parameters = {},
|
const std::vector<AnfNodePtr> &host_parameters = {},
|
||||||
GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
|
GraphExecutionStrategy strategy = GraphExecutionStrategy::kPipeline);
|
||||||
std::string FetchActorName(KernelTransformType kernel_type, const std::string &actor_set_name,
|
std::string FetchActorName(KernelTransformType kernel_type, const std::string &actor_set_name,
|
||||||
const AnfNodePtr &node = nullptr, const KernelGraphPtr &graph = nullptr);
|
const AnfNodePtr &node = nullptr, const KernelGraphPtr &graph = nullptr);
|
||||||
|
|
||||||
// Check whether the parameter is a ref parameter.
|
|
||||||
bool HasAbstractRef(const AnfNodePtr &node);
|
|
||||||
|
|
||||||
// Fetch the input indexes which may be modified that exist in the input ref parameter.
|
// Fetch the input indexes which may be modified that exist in the input ref parameter.
|
||||||
std::set<size_t> FetchModifiableRefInputIndex(const CNodePtr &node);
|
std::set<size_t> FetchModifiableRefInputIndex(const CNodePtr &node);
|
||||||
// Fetch the output indexes which may be modified that exist in the ref node.
|
// Fetch the output indexes which may be modified that exist in the ref node.
|
||||||
std::set<size_t> FetchModifiableRefOutputIndex(const CNodePtr &node, const KernelGraphPtr &graph);
|
std::set<size_t> FetchModifiableRefOutputIndex(const CNodePtr &node, const KernelGraphPtr &graph);
|
||||||
|
|
||||||
// The size of output address may be changed in dynamic shape scenario, for example, the output shape of operator
|
|
||||||
// 'Unique' will change after Launch, the output address size should update.
|
|
||||||
void UpdateOutputAddrSize(KernelInfo *kernel_info, const CNodePtr &kernel);
|
|
||||||
// Update the shape of internal parameter.
|
|
||||||
void UpdateInternalParameterShape(const std::map<size_t, AnfNodeWeakPtr> &internal_parameters, const CNodePtr &cnode);
|
|
||||||
} // namespace runtime
|
} // namespace runtime
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
||||||
|
|
|
@ -166,7 +166,7 @@ void ExitActor::CopyDeviceAddress(OpContext<DeviceTensor> *const context) {
|
||||||
|
|
||||||
const KernelWithIndex &node_with_index = input_device_tensor->GetNodeIndex();
|
const KernelWithIndex &node_with_index = input_device_tensor->GetNodeIndex();
|
||||||
MS_EXCEPTION_IF_NULL(node_with_index.first);
|
MS_EXCEPTION_IF_NULL(node_with_index.first);
|
||||||
if (HasAbstractRef(node_with_index.first)) {
|
if (common::AnfAlgo::HasAbstractRef(node_with_index.first)) {
|
||||||
(void)new_device_tensors.emplace_back(input_device_tensor);
|
(void)new_device_tensors.emplace_back(input_device_tensor);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,9 +38,9 @@ void CustomActor::Run(OpContext<DeviceTensor> *const ctx) {
|
||||||
if (AnfUtils::GetCustomActorType(kernel_.lock()) == kInfer) {
|
if (AnfUtils::GetCustomActorType(kernel_.lock()) == kInfer) {
|
||||||
auto base_node = AnfUtils::GetCustomActorBaseNode(kernel_.lock());
|
auto base_node = AnfUtils::GetCustomActorBaseNode(kernel_.lock());
|
||||||
auto kernel_info = dynamic_cast<KernelInfo *>(base_node->kernel_info());
|
auto kernel_info = dynamic_cast<KernelInfo *>(base_node->kernel_info());
|
||||||
UpdateOutputAddrSize(kernel_info, base_node);
|
AnfAlgo::UpdateOutputAddrSize(kernel_info, base_node);
|
||||||
// Update the shape of internal parameter.
|
// Update the shape of internal parameter.
|
||||||
UpdateInternalParameterShape(internal_parameters_, base_node);
|
AnfAlgo::UpdateInternalParameterShape(internal_parameters_, base_node);
|
||||||
}
|
}
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
if (strategy_ == GraphExecutionStrategy::kPipeline) {
|
if (strategy_ == GraphExecutionStrategy::kPipeline) {
|
||||||
|
|
|
@ -354,7 +354,7 @@ void DataPrepareActor::PrepareDataForDeviceTensorStore(const std::vector<std::ve
|
||||||
// Prepare the data of device tensor store(value nodes of graph).
|
// Prepare the data of device tensor store(value nodes of graph).
|
||||||
for (const auto &value_node : graph->graph_value_nodes()) {
|
for (const auto &value_node : graph->graph_value_nodes()) {
|
||||||
if (AnfAlgo::OutputAddrExist(value_node, 0)) {
|
if (AnfAlgo::OutputAddrExist(value_node, 0)) {
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(value_node, *graph);
|
||||||
PrepareDataForValueNode(value_node, front_node, device_context, context);
|
PrepareDataForValueNode(value_node, front_node, device_context, context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -366,7 +366,7 @@ void DataPrepareActor::PrepareDataForDeviceTensorStore(const std::vector<std::ve
|
||||||
const auto &input_node = input_nodes[j];
|
const auto &input_node = input_nodes[j];
|
||||||
const auto &input_tensor = tensors[j];
|
const auto &input_tensor = tensors[j];
|
||||||
MS_EXCEPTION_IF_NULL(input_node);
|
MS_EXCEPTION_IF_NULL(input_node);
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(input_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(input_node, *graph);
|
||||||
if (IsPersistentDeviceTensor(input_node) && parser->IsRootGraphPersistentDeviceTensor(front_node)) {
|
if (IsPersistentDeviceTensor(input_node) && parser->IsRootGraphPersistentDeviceTensor(front_node)) {
|
||||||
PrepareDataForWeightNode(input_node, front_node, input_tensor, device_context, context);
|
PrepareDataForWeightNode(input_node, front_node, input_tensor, device_context, context);
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,8 +167,8 @@ void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext<DeviceTensor> *co
|
||||||
|
|
||||||
if (common::AnfAlgo::IsDynamicShape(data_kernel_)) {
|
if (common::AnfAlgo::IsDynamicShape(data_kernel_)) {
|
||||||
kernel::UpdateNodeShape(data_kernel_);
|
kernel::UpdateNodeShape(data_kernel_);
|
||||||
UpdateOutputAddrSize(kernel_info_, data_kernel_);
|
AnfAlgo::UpdateOutputAddrSize(kernel_info_, data_kernel_);
|
||||||
UpdateInternalParameterShape(internal_parameters_, data_kernel_);
|
AnfAlgo::UpdateInternalParameterShape(internal_parameters_, data_kernel_);
|
||||||
}
|
}
|
||||||
PostRun(context);
|
PostRun(context);
|
||||||
}
|
}
|
||||||
|
|
|
@ -460,8 +460,8 @@ void KernelActor::PreLaunchKernel(OpContext<DeviceTensor> *) {
|
||||||
void KernelActor::PostLaunchKernel(OpContext<DeviceTensor> *const context) {
|
void KernelActor::PostLaunchKernel(OpContext<DeviceTensor> *const context) {
|
||||||
if (is_dynamic_shape_) {
|
if (is_dynamic_shape_) {
|
||||||
kernel::UpdateNodeShape(kernel_);
|
kernel::UpdateNodeShape(kernel_);
|
||||||
UpdateOutputAddrSize(kernel_info_, kernel_);
|
AnfAlgo::UpdateOutputAddrSize(kernel_info_, kernel_);
|
||||||
UpdateInternalParameterShape(internal_parameters_, kernel_);
|
AnfAlgo::UpdateInternalParameterShape(internal_parameters_, kernel_);
|
||||||
}
|
}
|
||||||
|
|
||||||
running_dependent_msg_num_ = SizeToInt(input_datas_num_ + input_controls_num_);
|
running_dependent_msg_num_ = SizeToInt(input_datas_num_ + input_controls_num_);
|
||||||
|
|
|
@ -152,7 +152,7 @@ bool SuperKernelActor::CopyInputData(const OpContext<DeviceTensor> *context) {
|
||||||
MS_LOG(ERROR) << "Copy data failed.";
|
MS_LOG(ERROR) << "Copy data failed.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (HasAbstractRef(input_node) && ref_node_addr_map_.count(input_node) == 0) {
|
if (common::AnfAlgo::HasAbstractRef(input_node) && ref_node_addr_map_.count(input_node) == 0) {
|
||||||
ref_node_addr_map_[input_node] = input_device_tensor;
|
ref_node_addr_map_[input_node] = input_device_tensor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,11 +95,11 @@ bool is_need_copy_device_tensor(const AnfNodePtr &backend_node, size_t index) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HasAbstractRef(backend_node)) {
|
if (common::AnfAlgo::HasAbstractRef(backend_node)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto kernel_graph = FetchKernelGraph(backend_node);
|
auto kernel_graph = AnfAlgo::FetchKernelGraph(backend_node);
|
||||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
if (kernel_graph->IsInRefOutputMap({backend_node, index})) {
|
if (kernel_graph->IsInRefOutputMap({backend_node, index})) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -1268,7 +1268,7 @@ void ControlNodeScheduler::LinkControlArrowForKernelActor(ActorSet *const actor_
|
||||||
} else if (no_input_kernel_actor->type_ == KernelTransformType::kKernelActor) {
|
} else if (no_input_kernel_actor->type_ == KernelTransformType::kKernelActor) {
|
||||||
const auto &kernel_actor = dynamic_cast<KernelActor *>(no_input_kernel_actor.get());
|
const auto &kernel_actor = dynamic_cast<KernelActor *>(no_input_kernel_actor.get());
|
||||||
MS_EXCEPTION_IF_NULL(kernel_actor);
|
MS_EXCEPTION_IF_NULL(kernel_actor);
|
||||||
kernel_graph = FetchKernelGraph(kernel_actor->kernel());
|
kernel_graph = AnfAlgo::FetchKernelGraph(kernel_actor->kernel());
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1289,7 +1289,7 @@ void ControlNodeScheduler::LinkControlArrowForKernelActor(ActorSet *const actor_
|
||||||
for (auto &kernel_actor : actor_set->kernel_actors_) {
|
for (auto &kernel_actor : actor_set->kernel_actors_) {
|
||||||
MS_EXCEPTION_IF_NULL(kernel_actor);
|
MS_EXCEPTION_IF_NULL(kernel_actor);
|
||||||
if ((kernel_actor->output_data_arrows_.size() == 0) && (kernel_actor->output_control_arrows_.size() == 0)) {
|
if ((kernel_actor->output_data_arrows_.size() == 0) && (kernel_actor->output_control_arrows_.size() == 0)) {
|
||||||
auto kernel_graph = FetchKernelGraph(kernel_actor->kernel());
|
auto kernel_graph = AnfAlgo::FetchKernelGraph(kernel_actor->kernel());
|
||||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||||
auto to_actor_name = parser->FetchGroupNameByKernelGraph(kernel_graph) + kExitActorNameSuffix;
|
auto to_actor_name = parser->FetchGroupNameByKernelGraph(kernel_graph) + kExitActorNameSuffix;
|
||||||
auto to_actor = FetchActor(to_actor_name);
|
auto to_actor = FetchActor(to_actor_name);
|
||||||
|
@ -1665,7 +1665,7 @@ void ControlNodeScheduler::AddFormalParameterDeviceTensor(ControlActor *const fr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!HasAbstractRef(input_node)) {
|
if (!common::AnfAlgo::HasAbstractRef(input_node)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -168,7 +168,7 @@ void ClearNodeInfo(const KernelGraphPtr &graph) {
|
||||||
if (parameter->used_graph_count() != 0) {
|
if (parameter->used_graph_count() != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto front_input_node = FetchFrontNodeByBackendNode(input_node, graph);
|
auto front_input_node = AnfAlgo::FetchFrontNodeByBackendNode(input_node, *graph);
|
||||||
DeviceTensorStore::GetInstance().Remove(front_input_node.get());
|
DeviceTensorStore::GetInstance().Remove(front_input_node.get());
|
||||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(input_node);
|
size_t output_num = common::AnfAlgo::GetOutputTensorNum(input_node);
|
||||||
for (size_t index = 0; index < output_num; ++index) {
|
for (size_t index = 0; index < output_num; ++index) {
|
||||||
|
@ -180,7 +180,7 @@ void ClearNodeInfo(const KernelGraphPtr &graph) {
|
||||||
|
|
||||||
// Clear input value node device tensor and device tensor store.
|
// Clear input value node device tensor and device tensor store.
|
||||||
for (const auto &value_node : graph->graph_value_nodes()) {
|
for (const auto &value_node : graph->graph_value_nodes()) {
|
||||||
auto front_value_node = FetchFrontNodeByBackendNode(value_node, graph);
|
auto front_value_node = AnfAlgo::FetchFrontNodeByBackendNode(value_node, *graph);
|
||||||
DeviceTensorStore::GetInstance().Remove(front_value_node.get());
|
DeviceTensorStore::GetInstance().Remove(front_value_node.get());
|
||||||
if (AnfAlgo::OutputAddrExist(value_node, 0)) {
|
if (AnfAlgo::OutputAddrExist(value_node, 0)) {
|
||||||
AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
|
AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
|
||||||
|
@ -845,7 +845,7 @@ std::vector<DataSourceActorPtr> GraphScheduler::BuildDataSourceActor(const Graph
|
||||||
(void)data_source_actors.emplace_back(host_queue_ds_actor);
|
(void)data_source_actors.emplace_back(host_queue_ds_actor);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(input_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(input_node, *graph);
|
||||||
// In the scenario where multiple backend nodes correspond to the same front node, only the first backend node
|
// In the scenario where multiple backend nodes correspond to the same front node, only the first backend node
|
||||||
// is saved in the host queue data source actor.
|
// is saved in the host queue data source actor.
|
||||||
if (front_node_position_temp_map.count(front_node) > 0) {
|
if (front_node_position_temp_map.count(front_node) > 0) {
|
||||||
|
@ -1176,7 +1176,7 @@ void GraphScheduler::LinkDataArrowInSinkMode(const KernelGraphPtr &graph, const
|
||||||
(void)std::for_each(auto_monad_kernels.begin(), auto_monad_kernels.end(), [&](const CNodePtr &kernel) {
|
(void)std::for_each(auto_monad_kernels.begin(), auto_monad_kernels.end(), [&](const CNodePtr &kernel) {
|
||||||
for (size_t i = 0; i < common::AnfAlgo::GetInputTensorNum(kernel); ++i) {
|
for (size_t i = 0; i < common::AnfAlgo::GetInputTensorNum(kernel); ++i) {
|
||||||
KernelWithIndex from_kernel_with_output_idx = common::AnfAlgo::GetPrevNodeOutput(kernel, i, false);
|
KernelWithIndex from_kernel_with_output_idx = common::AnfAlgo::GetPrevNodeOutput(kernel, i, false);
|
||||||
auto front_node = FetchFrontNodeByBackendNode(from_kernel_with_output_idx.first, graph);
|
auto front_node = AnfAlgo::FetchFrontNodeByBackendNode(from_kernel_with_output_idx.first, *graph);
|
||||||
if (IsPersistentDeviceTensor(front_node)) {
|
if (IsPersistentDeviceTensor(front_node)) {
|
||||||
(void)to_actor->auto_monad_device_tensor_stores_.insert(front_node);
|
(void)to_actor->auto_monad_device_tensor_stores_.insert(front_node);
|
||||||
}
|
}
|
||||||
|
@ -1268,7 +1268,7 @@ void GraphScheduler::LinkDataArrowForDeviceTensorStore(AbstractActor *const, Abs
|
||||||
|
|
||||||
auto from_kernel = from_kernel_with_output_idx.first;
|
auto from_kernel = from_kernel_with_output_idx.first;
|
||||||
MS_EXCEPTION_IF_NULL(from_kernel);
|
MS_EXCEPTION_IF_NULL(from_kernel);
|
||||||
auto device_tensor_store_key = FetchFrontNodeByBackendNode(from_kernel, graph);
|
auto device_tensor_store_key = AnfAlgo::FetchFrontNodeByBackendNode(from_kernel, *graph);
|
||||||
(void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, device_tensor_store_key);
|
(void)to_actor->device_tensor_store_keys_.emplace_back(to_kernel_with_input_idx.second, device_tensor_store_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1772,7 +1772,7 @@ void GraphScheduler::LinkControlArrowForCustomActor(ActorSet *const actor_set,
|
||||||
for (auto iter = dynamic_shape_depends.begin(); iter != dynamic_shape_depends.end(); ++iter) {
|
for (auto iter = dynamic_shape_depends.begin(); iter != dynamic_shape_depends.end(); ++iter) {
|
||||||
auto input_node = common::AnfAlgo::GetInputNode(base_node, *iter);
|
auto input_node = common::AnfAlgo::GetInputNode(base_node, *iter);
|
||||||
KernelWithIndex from_kernel_with_output_idx = common::AnfAlgo::VisitKernelWithReturnType(input_node, 0, false);
|
KernelWithIndex from_kernel_with_output_idx = common::AnfAlgo::VisitKernelWithReturnType(input_node, 0, false);
|
||||||
auto graph = FetchKernelGraph(from_kernel_with_output_idx.first);
|
auto graph = AnfAlgo::FetchKernelGraph(from_kernel_with_output_idx.first);
|
||||||
auto kernel_type =
|
auto kernel_type =
|
||||||
FetchKernelTransformType(from_kernel_with_output_idx.first, graph, graph_compiler_info.origin_parameters_order_,
|
FetchKernelTransformType(from_kernel_with_output_idx.first, graph, graph_compiler_info.origin_parameters_order_,
|
||||||
graph_compiler_info.strategy_);
|
graph_compiler_info.strategy_);
|
||||||
|
@ -1946,7 +1946,7 @@ void GraphScheduler::LinkOutputResultArrowForOutputActor(OutputActor *to_actor,
|
||||||
}
|
}
|
||||||
for (const auto &output_with_index : unique_outputs) {
|
for (const auto &output_with_index : unique_outputs) {
|
||||||
MS_EXCEPTION_IF_NULL(output_with_index.first);
|
MS_EXCEPTION_IF_NULL(output_with_index.first);
|
||||||
auto origin_output_with_index = FetchFrontNodeWithIndexByGraphOutput(output_with_index, graph);
|
auto origin_output_with_index = graph->GetFrontNodeWithIndexByGraphOutput(output_with_index);
|
||||||
const auto &iter = graph_compiler_info.origin_outputs_order_.find(origin_output_with_index);
|
const auto &iter = graph_compiler_info.origin_outputs_order_.find(origin_output_with_index);
|
||||||
if (iter == graph_compiler_info.origin_outputs_order_.end()) {
|
if (iter == graph_compiler_info.origin_outputs_order_.end()) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -2194,7 +2194,7 @@ void GraphScheduler::PersistDeviceTensor(const GraphCompilerInfo &graph_compiler
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto device_tensor = AnfAlgo::GetMutableOutputAddr(value_node, 0, false);
|
auto device_tensor = AnfAlgo::GetMutableOutputAddr(value_node, 0, false);
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(value_node, *graph);
|
||||||
device_tensor->SetNodeIndex(value_node, 0);
|
device_tensor->SetNodeIndex(value_node, 0);
|
||||||
AddDeviceTensorStore(front_node.get(), device_tensor);
|
AddDeviceTensorStore(front_node.get(), device_tensor);
|
||||||
}
|
}
|
||||||
|
@ -2206,7 +2206,7 @@ void GraphScheduler::PersistDeviceTensor(const GraphCompilerInfo &graph_compiler
|
||||||
auto front_output_with_index = graph->GetFrontNodeByInternalParameter(input_node);
|
auto front_output_with_index = graph->GetFrontNodeByInternalParameter(input_node);
|
||||||
front_node = front_output_with_index.first;
|
front_node = front_output_with_index.first;
|
||||||
} else if (IsPersistentDeviceTensor(input_node)) {
|
} else if (IsPersistentDeviceTensor(input_node)) {
|
||||||
front_node = FetchFrontNodeByBackendNode(input_node, graph);
|
front_node = AnfAlgo::FetchFrontNodeByBackendNode(input_node, *graph);
|
||||||
}
|
}
|
||||||
// The front node may be value node in the heterogeneous scene, needs to handle.
|
// The front node may be value node in the heterogeneous scene, needs to handle.
|
||||||
if ((front_node == nullptr) ||
|
if ((front_node == nullptr) ||
|
||||||
|
@ -2345,7 +2345,7 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
|
||||||
if (!AnfAlgo::OutputAddrExist(value_node, 0)) {
|
if (!AnfAlgo::OutputAddrExist(value_node, 0)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(value_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(value_node, *graph);
|
||||||
MS_EXCEPTION_IF_NULL(front_node);
|
MS_EXCEPTION_IF_NULL(front_node);
|
||||||
const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
|
const auto device_tensors = DeviceTensorStore::GetInstance().Fetch(front_node.get());
|
||||||
ofs << "\t\tdevice tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size()
|
ofs << "\t\tdevice tensor key:" << front_node->fullname_with_scope() << "\tvalue size:" << device_tensors.size()
|
||||||
|
@ -2365,7 +2365,7 @@ void GraphScheduler::DumpDeviceTensorStore(const GraphCompilerInfo &graph_compil
|
||||||
if (!IsPersistentDeviceTensor(input_node)) {
|
if (!IsPersistentDeviceTensor(input_node)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto &front_node = FetchFrontNodeByBackendNode(input_node, graph);
|
const auto &front_node = AnfAlgo::FetchFrontNodeByBackendNode(input_node, *graph);
|
||||||
const auto &root_parameters = graph_compiler_info.origin_parameters_order_;
|
const auto &root_parameters = graph_compiler_info.origin_parameters_order_;
|
||||||
if (front_node == nullptr ||
|
if (front_node == nullptr ||
|
||||||
find(root_parameters.begin(), root_parameters.end(), front_node) == root_parameters.end()) {
|
find(root_parameters.begin(), root_parameters.end(), front_node) == root_parameters.end()) {
|
||||||
|
|
Loading…
Reference in New Issue