diff --git a/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc index 216c3496fa5..325f05f526b 100644 --- a/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc +++ b/mindspore/ccsrc/backend/common/pass/communication_op_fusion.cc @@ -185,6 +185,8 @@ bool CommunicationOpFusion::GetSplitSegments(const CommunicationOpInfo &communic if (parallel_mode == parallel::kDataParallel && op_name_ == kAllReduceOpName) { auto threshold = parallel_context->dp_fusion_threshold_mb(); GetAllReduceSplitSegment(communication_op_info.communication_op_nodes, threshold, segment_index); + MS_LOG(INFO) << "The split threshold for AllReduce is " << threshold << ", the segment num is " + << segment_index->size(); } return CheckSegments(communication_op_node_size, segment_index); } diff --git a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc index 87854dc868a..a18d6186a24 100644 --- a/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/common/session/anf_runtime_algorithm.cc @@ -1050,7 +1050,7 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::mapset_abstract(eval_result); diff --git a/mindspore/ccsrc/include/common/utils/anfalgo.h b/mindspore/ccsrc/include/common/utils/anfalgo.h index 49c52a2850c..8e867ee0e99 100644 --- a/mindspore/ccsrc/include/common/utils/anfalgo.h +++ b/mindspore/ccsrc/include/common/utils/anfalgo.h @@ -170,7 +170,7 @@ class COMMON_EXPORT AnfAlgo { static bool IsHostKernel(const CNodePtr &node); // return true if use cnode_input's abstract, false if use real_input's abstract static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input, - const AnfNodePtr &real_input, size_t index); + const AnfNodePtr &real_input); // Find real input nodes. static void GetAllFatherRealNode(const AnfNodePtr &anf_node, std::vector *result, std::set *visited); diff --git a/mindspore/ccsrc/kernel/kernel.cc b/mindspore/ccsrc/kernel/kernel.cc index 1f72482a37f..9bba03a5c44 100644 --- a/mindspore/ccsrc/kernel/kernel.cc +++ b/mindspore/ccsrc/kernel/kernel.cc @@ -95,7 +95,7 @@ void KernelMod::InferShape() { tuple_elements->set_value(out_tensor); } } - common::AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i); + common::AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input); } auto eval_result = opt::CppInferShape(primitive, args_spec_list); cnode->set_abstract(eval_result); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.h index 89e4e2ea8c4..9614b24cf5f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/batchmatmul_fusedmuladd_fusion_pass.h @@ -34,11 +34,11 @@ class BatchMatmulFusedMulAddFusionPass : public FusionBasePass { PassSwitchManager::GetInstance().RegistLicPass(name(), OptPassEnum::BatchMatmulFusedMulAddFusionPass); } ~BatchMatmulFusedMulAddFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; private: void MatchBatchMatmulFusedMulAdd(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); - void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/matmul_dropoutdomaskv3_add_fusion_pass.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/matmul_dropoutdomaskv3_add_fusion_pass.h index b08191119a7..3cd40a2a0ea 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/matmul_dropoutdomaskv3_add_fusion_pass.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/buffer_fusion/matmul_dropoutdomaskv3_add_fusion_pass.h @@ -32,10 +32,10 @@ class MatmulDropoutDoMaskV3AddFusionPass : public FusionBasePass { explicit MatmulDropoutDoMaskV3AddFusionPass(FusionIdAllocatorPtr idAllocator) : FusionBasePass("MatmulDropoutDoMaskV3AddFusionPass", idAllocator) {} ~MatmulDropoutDoMaskV3AddFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; private: void MatchMatmulDropoutDoMaskV3Add(const CNodePtr &cnode, FusedNodeRecord *candidate_fusion); - void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h index 3e921487203..8d3dbab401a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/squared_difference_kernel.h @@ -64,7 +64,7 @@ class SquaredDifferenceOpGpuKernelMod : public NativeGpuKernelMod { InitSizeLists(); return true; } - need_broadcast_ = IsBroadcast(input_shape1, input_shape2); + need_broadcast_ = common::AnfAlgo::IsTensorBroadcast(input_shape1, input_shape2); if (need_broadcast_ && output_shape.size() > MAX_DIMS) { MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of output cannot be greater than " << MAX_DIMS << ", but got " << output_shape.size(); @@ -135,18 +135,6 @@ class SquaredDifferenceOpGpuKernelMod : public NativeGpuKernelMod { } private: - bool IsBroadcast(const std::vector &lhs, const std::vector &rhs) { - if (lhs.size() != rhs.size()) { - return true; - } - for (size_t i = 0; i < lhs.size(); i++) { - if (lhs[i] != rhs[i]) { - return true; - } - } - return false; - } - BroadcastOpType op_type_; bool need_broadcast_; bool is_comp_op_; diff --git a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc index 739ec8c53e9..178def51085 100644 --- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc +++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc @@ -105,7 +105,7 @@ void DynamicKernel::InferShape() { tuple_elements->set_value(out_tensor); } } - common::AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i); + common::AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input); } auto eval_result = opt::CppInferShape(primitive, args_spec_list); cnode->set_abstract(eval_result); diff --git a/mindspore/ccsrc/utils/anfalgo.cc b/mindspore/ccsrc/utils/anfalgo.cc index c3d9dc34605..b32039b5922 100644 --- a/mindspore/ccsrc/utils/anfalgo.cc +++ b/mindspore/ccsrc/utils/anfalgo.cc @@ -1422,7 +1422,7 @@ bool AnfAlgo::IsHostKernel(const CNodePtr &kernel_node) { } void AnfAlgo::AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input, - const AnfNodePtr &real_input, size_t index) { + const AnfNodePtr &real_input) { if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) { // cppcheck-suppress unreadVariable auto lock = AnfUtils::GetAbstractLock(real_input.get());