From 5bec9e95cdc9ff30ff883f9c8b7d52906e78c314 Mon Sep 17 00:00:00 2001 From: liuchongming Date: Mon, 26 Feb 2024 19:56:19 +0800 Subject: [PATCH] Supply partial shape to full shape in dynamic shape. --- .jenkins/check/config/whitelizard.txt | 3 +- .../parallel/graph_util/graph_utils.cc | 48 +++++++++++++------ .../parallel/ops_info/operator_info.cc | 3 +- .../ccsrc/frontend/parallel/step_parallel.cc | 5 ++ 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index 6489572ca3b..c7356cf7142 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -445,4 +445,5 @@ mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/ mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/selection_ops_proto.cc:ge::IMPLEMT_COMMON_INFERFUNC mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/nn_pooling_ops_proto.cc:ge::IMPLEMT_COMMON_INFERFUNC mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/nn_norm_ops_proto.cc:ge::CUST_IMPLEMT_INFERFUNC -mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/cast.cc:aicpu::CastKernel::Compute \ No newline at end of file +mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/cast.cc:aicpu::CastKernel::Compute +mindspore/mindspore/ccsrc/frontend/parallel/step_parallel.cc:mindspore::parallel::StepParallel \ No newline at end of file diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc b/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc index 12ee28114fb..9b70dbc249f 100644 --- a/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc +++ b/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc @@ -54,14 +54,7 @@ CNodePtr CreateShape(const AnfNodePtr &pre_cnode, const FuncGraphPtr &func_graph return shape_cnode; } -bool IsTargetOp(const CNodePtr &cnode, const std::string &target) { - RETURN_IF_FALSE(cnode != nullptr); - auto value_node = cnode->input(0)->cast(); - RETURN_IF_FALSE(value_node != nullptr); - auto prim = value_node->value()->cast(); - RETURN_IF_FALSE(prim != nullptr); - return prim->name() == target; -} +inline bool IsTargetOp(const CNodePtr &cnode, const std::string &target) { return GetPrimName(cnode) == target; } bool IsTupleGetItem(const CNodePtr &cnode) { return IsTargetOp(cnode, TUPLE_GETITEM_OP); } @@ -497,6 +490,10 @@ Status ConvertReshapeInputs(const OperatorParams ¶ms, } Shape shape_vec = GetValue(param.first.second); MS_LOG(INFO) << "shape param = " << shape_vec; + size_t dynamic_axis_cnt = std::count(shape_vec.begin(), shape_vec.end(), -1); + if (shape_vec.size() > 1 && dynamic_axis_cnt >= SIZE_TWO) { + MS_LOG(EXCEPTION) << "The shape of Reshape op has more than one -1, cannot be supported for now."; + } if (!WhetherMatchingIsNeededForReshape(shape_vec, tensor_redistribution_from_cnode)) { MS_LOG(INFO) << "No need to matching for " << shape_vec; AnfNodePtr val = NewValueNode(param.first.second); @@ -880,23 +877,39 @@ Status UpdatePartialShape(const CNodePtr &cnode) { return Status::SUCCESS; } +CNodePtr FindPreviousCareNode(const CNodePtr ¤t, int32_t depth = 0) { + if (depth == MAX_RECURSIVE_DEPTH) { + return nullptr; + } + auto prev = current->input(1); + // If prev is parameter maybe problem here. + auto cnode = prev->cast(); + MS_EXCEPTION_IF_CHECK_FAIL(cnode != nullptr, "Input of node is parameter is not supported."); + if (!IsParallelCareNode(cnode) && (IsTargetOp(cnode, "Cast") || IsTupleGetItem(cnode))) { + return FindPreviousCareNode(cnode, depth + 1); + } + return cnode; +} + TensorInfo GetDistributeOperatorFromCNode(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); CNodePtr target_cnode = cnode; - if (IsTupleGetItem(cnode)) { + if (!IsParallelCareNode(cnode)) { // keep search the previous node. - auto prev_node = FindPreviousNodeAndSkipTupleGetItem(cnode); - target_cnode = prev_node.first; + target_cnode = FindPreviousCareNode(cnode); } + MS_EXCEPTION_IF_NULL(target_cnode); if (!target_cnode->has_user_data()) { - MS_LOG(EXCEPTION) << target_cnode->fullname_with_scope() << " has no operator info."; + MS_LOG(EXCEPTION) << "Found " << cnode->fullname_with_scope() << " previous node is " + << target_cnode->fullname_with_scope() << " and it has no operator info."; } OperatorInfoPtr distribute_operator = GetDistributeOperator(target_cnode); MS_EXCEPTION_IF_NULL(distribute_operator); std::vector root_tensor_info = distribute_operator->outputs_tensor_info(); if (root_tensor_info.size() != 1) { - MS_LOG(EXCEPTION) << "Outputs number cannot be larger than 1."; + MS_LOG(INFO) << "Outputs number cannot be larger than 1, but " << target_cnode->fullname_with_scope() << " has " + << root_tensor_info.size() << " outputs."; } return root_tensor_info[0]; } @@ -921,7 +934,12 @@ Status UpdateShapeNode(const CNodePtr &cnode, const FuncGraphPtr &func_graph) { if (shape_user == nullptr) { continue; } - MS_EXCEPTION_IF_CHECK_FAIL(IsTupleGetItem(shape_user), "Only support TupleGetItem here."); + if (IsReshapeOp(shape_user)) { + MS_LOG(WARNING) << "Won't supply shape for Reshape."; + continue; + } + MS_EXCEPTION_IF_CHECK_FAIL(IsTupleGetItem(shape_user), + "Only support TupleGetItem here, but got " + GetPrimName(shape_user)); int64_t index = GetTupleGetItemIndex(shape_user); if (LongToSize(index) >= tensor_map.GetDimSize()) { MS_LOG(ERROR) << "Index cannot be larger than tensor_map size."; @@ -944,7 +962,7 @@ Status UpdateShapeNode(const CNodePtr &cnode, const FuncGraphPtr &func_graph) { next_node.second, // shape_user_user[input_index] = scalar_mul_op shape_user, // insert scalar_mul_op between previous and current shape_user_user->func_graph(), // current func_graph - "instance_name", "", nullptr); + "update_partial_shape", "", nullptr); } } return Status::SUCCESS; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc index fa369cdf91c..909e01e5ad8 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc @@ -683,8 +683,7 @@ Operator CreateScalarMulOp(int64_t scalar) { OperatorAttrs operator_attrs; OperatorParams operator_param; constexpr size_t parameter_pos = 2; - mindspore::tensor::TensorPtr tensor_ptr = std::make_shared(scalar); - ValuePtr scale_value = MakeValue(tensor_ptr); + ValuePtr scale_value = MakeValue(std::make_shared(scalar)); (void)operator_param.emplace_back(std::make_pair(std::make_pair(Y, scale_value), parameter_pos)); OperatorArgs operator_arg = std::make_pair(operator_attrs, operator_param); diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index 8915aef67ee..c3a658d25fe 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -3101,6 +3101,11 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) // save strategy as checkpoint for multi-train CheckpointStrategy(all_nodes, root); + if (MergeEntireShapeForDynamic(root) != Status::SUCCESS) { + MS_LOG(ERROR) << "Merge entire shape for dynamic shape failed."; + return false; + } + // ForwardCommunication BackwardCommunication TensorRedistribution ParallelCommunication(root, all_nodes, manager);