aclnnMean/aclnnReduceSum adaption

type: feature target: all reason: ------ Signed-off-by: wang_ziqi <wangziqi4@huawei.com>
2024-02-26 21:30:01 +08:00 · 2024-02-26 21:30:01 +08:00 · 260ba5846c
parent 83f322b4ba
commit 260ba5846c
47 changed files with 2314 additions and 212 deletions
--- a/docs/api/api_python/mindspore/Tensor/mindspore.Tensor.mean.rst
+++ b/docs/api/api_python/mindspore/Tensor/mindspore.Tensor.mean.rst
@ -1,6 +1,6 @@
 mindspore.Tensor.mean
 =====================

-.. py:method:: mindspore.Tensor.mean(axis=None, keep_dims=False)
+.. py:method:: mindspore.Tensor.mean(axis=None, keep_dims=False, dtype=None)

    详情请参考 :func:`mindspore.ops.mean`。
--- a/docs/api/api_python/ops/mindspore.ops.func_mean.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_mean.rst
@ -1,17 +1,18 @@
 mindspore.ops.mean
 ==================

-.. py:function:: mindspore.ops.mean(x, axis=None, keep_dims=False)
+.. py:function:: mindspore.ops.mean(input, axis=None, keep_dims=False, dtype=None)

-    默认情况下，移除输入所有维度，返回 `x` 中所有元素的平均值。也可仅缩小指定维度 `axis` 大小至1。 `keep_dims` 控制输出和输入的维度是否相同。
+    默认情况下，移除输入所有维度，返回 `input` 中所有元素的平均值。也可仅缩小指定维度 `axis` 大小至1。 `keep_dims` 控制输出和输入的维度是否相同。

    .. note::
        Tensor类型的 `axis` 仅用作兼容旧版本，不推荐使用。

    参数：
-        - **x** (Tensor[Number]) - 输入Tensor，其数据类型为数值型。shape： :math:`(N, *)` ，其中 :math:`*` 表示任意数量的附加维度。
-        - **axis** (Union[int, tuple(int), list(int), Tensor]) - 要减少的维度。默认值:  ``None`` ，缩小所有维度。只允许常量值。假设 `x` 的秩为r，取值范围[-r,r)。
+        - **input** (Tensor[Number]) - 输入Tensor，其数据类型为数值型。shape： :math:`(N, *)` ，其中 :math:`*` 表示任意数量的附加维度。
+        - **axis** (Union[int, tuple(int), list(int), Tensor]) - 要减少的维度。默认值:  ``None`` ，缩小所有维度。只允许常量值。假设 `input` 的秩为r，取值范围[-r,r)。
        - **keep_dims** (bool) - 如果为 ``True`` ，则保留缩小的维度，大小为1。否则移除维度。默认值： ``False`` 。
+        - **dtype** (:class:`mindspore.dtype`) - 期望输出Tensor的类型。默认值： ``None`` 。

    返回：
        Tensor。
@ -22,7 +23,7 @@ mindspore.ops.mean
        - 如果 `axis` 为一维Tensor，例如取值为[1, 2]，并且 `keep_dims` 为 ``False`` ，则输出Tensor的shape为 :math:`(x_0, x_3, ..., x_R)` 。

    异常：
-        - **TypeError** - `x` 不是Tensor。
+        - **TypeError** - `input` 不是Tensor。
        - **TypeError** - `axis` 不是以下数据类型之一：int、tuple、list或Tensor。
        - **TypeError** - `keep_dims` 不是bool类型。
        - **ValueError** - `axis` 超出范围。
--- a/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc
+++ b/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc
@ -17,9 +17,14 @@
 #include "backend/common/expander/fallback/fallback_irbuilder.h"
 #include "include/common/utils/utils.h"
 #include "utils/shape_utils.h"
+#include "ops/op_utils.h"

 namespace mindspore {
 namespace expander {
+namespace {
+const std::set<TypeId> kIntergralSet = {kNumberTypeBool, kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeInt16,
+                                        kNumberTypeInt32};
+}  // namespace
 REG_FALLBACK_BUILDER("AddExt").SetBody(BODYFUNC(ib) {
  auto x = ib->GetInput(kIndex0);
  auto y = ib->GetInput(kIndex1);
@ -35,5 +40,59 @@ REG_FALLBACK_BUILDER("SubExt").SetBody(BODYFUNC(ib) {
  auto alpha_tensor = ib->Cast(ib->ScalarToTensor(alpha, x->dtype()), y->dtype());
  return {x - y * alpha_tensor};
 });
+
+REG_FALLBACK_BUILDER("MeanExt").SetBody(BODYFUNC(ib) {
+  auto input = ib->GetInput(kIndex0);
+  auto axis = ib->GetInput(kIndex1);
+  auto keep_dims = ib->GetInput(kIndex2);
+  auto dtype = ib->GetInput(kIndex3);
+
+  auto dtype_type = dtype->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(dtype_type);
+  // cppcheck-suppress *
+  if (!dtype_type->isa<TypeNone>()) {
+    auto dtype_opt = ops::GetScalarValue<int64_t>(dtype->BuildValue());
+    MS_CHECK_VALUE(dtype_opt.has_value(), "For 'MeanExt', dtype must have valid value.");
+    input = ib->Cast(input, TypeIdToType(static_cast<TypeId>(dtype_opt.value())));
+  }
+
+  auto axis_type = axis->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(axis_type);
+  if (axis_type->isa<TypeNone>()) {
+    axis = ib->Value<std::vector<int64_t>>({});
+  }
+
+  auto out = ib->Emit("ReduceMean", {input, axis, keep_dims});
+  return {out};
+});
+
+REG_FALLBACK_BUILDER("SumExt").SetBody(BODYFUNC(ib) {
+  auto input = ib->GetInput(kIndex0);
+  auto axis = ib->GetInput(kIndex1);
+  auto keep_dims = ib->GetInput(kIndex2);
+  auto dtype = ib->GetInput(kIndex3);
+
+  auto dtype_type = dtype->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(dtype_type);
+  if (!dtype_type->isa<TypeNone>()) {
+    auto dtype_opt = ops::GetScalarValue<int64_t>(dtype->BuildValue());
+    MS_CHECK_VALUE(dtype_opt.has_value(), "For 'SumExt', dtype must have valid value.");
+    input = ib->Cast(input, TypeIdToType(static_cast<TypeId>(dtype_opt.value())));
+  } else {
+    auto input_type = input->dtype()->type_id();
+    if (kIntergralSet.find(input_type) != kIntergralSet.end()) {
+      input = ib->Cast(input, kInt64);
+    }
+  }
+
+  auto axis_type = axis->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(axis_type);
+  if (axis_type->isa<TypeNone>()) {
+    axis = ib->Value<std::vector<int64_t>>({});
+  }
+
+  auto out = ib->Emit("ReduceSum", {input, axis, keep_dims, ib->Value<bool>(false)});
+  return {out};
+});
 }  // namespace expander
 }  // namespace mindspore
--- a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc
+++ b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc
@ -2913,5 +2913,56 @@ REG_BPROP_BUILDER("IRFFT").SetBody(BODYFUNC(ib) {

  return {grad_dout, ib->OutZeros(n), ib->OutZeros(dim), ib->OutZeros(norm)};
 });
+
+REG_BPROP_BUILDER("MeanExt").SetUnusedInputs({i0, i4}).SetBody(BODYFUNC(ib) {
+  auto input = ib->GetInput(kIndex0);
+  auto input_dtype_id = ib->GetDtypeId(input);
+  if (input_dtype_id == kNumberTypeComplex64 || input_dtype_id == kNumberTypeComplex128) {
+    MS_EXCEPTION(TypeError) << "For 'MeanExt', gradient not support for complex type currently.";
+  }
+  auto axis = ib->GetInput(kIndex1);
+  auto keep_dims = ib->GetInput(kIndex2);
+  auto dtype = ib->GetInput(kIndex3);
+  auto out = ib->GetInput(kIndex4);
+  auto dout = ib->GetInput(kIndex5);
+
+  auto axis_type = axis->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(axis_type);
+  if (axis_type->isa<TypeNone>()) {
+    axis = ib->Value<std::vector<int64_t>>({});
+  }
+  auto grad = SumGrad(ib, input, axis, dout, GetValue<bool>(keep_dims->BuildValue()));
+  NodePtr div_shape_node;
+  if (IsDynamic(ib->GetShape(input)) || IsDynamic(ib->GetShape(out))) {
+    auto shape_out_sz = ib->DynSize(out, kFloat32);
+    auto div_shape = ib->DynSize(input, kFloat32) / shape_out_sz;
+    div_shape_node = ib->Cast(div_shape, ib->GetDtype(grad));
+  } else {
+    auto shape_out_sz = ib->GetSize(out);
+    if (shape_out_sz == 0) {
+      MS_EXCEPTION(ValueError) << "For 'MeanExt', out shape size can not be 0";
+    }
+    auto div_shape = ib->GetSize(input) / shape_out_sz;
+    div_shape_node = ib->Tensor(div_shape, ib->GetDtype(grad));
+  }
+  auto dx = ib->Cast(ib->RealDiv(grad, div_shape_node), ib->GetDtype(input));
+  return {dx, ib->OutZeros(axis), ib->OutZeros(keep_dims), ib->OutZeros(dtype)};
+});
+
+REG_BPROP_BUILDER("SumExt").SetUnusedInputs({i0, i4}).SetBody(BODYFUNC(ib) {
+  auto input = ib->GetInput(kIndex0);
+  auto axis = ib->GetInput(kIndex1);
+  auto keep_dims = ib->GetInput(kIndex2);
+  auto dtype = ib->GetInput(kIndex3);
+  auto dout = ib->GetInput(kIndex5);
+
+  auto axis_type = axis->abstract()->BuildType();
+  MS_EXCEPTION_IF_NULL(axis_type);
+  if (axis_type->isa<TypeNone>()) {
+    axis = ib->Value<std::vector<int64_t>>({});
+  }
+  auto dx = ib->Cast(SumGrad(ib, input, axis, dout, GetValue<bool>(keep_dims->BuildValue())), ib->GetDtype(input));
+  return {dx, ib->OutZeros(axis), ib->OutZeros(keep_dims), ib->OutZeros(dtype)};
+});
 REG_BPROP_BUILDERS_END
 }  // namespace mindspore::expander::bprop
--- a/mindspore/ccsrc/pipeline/pynative/op_function/converter.cc
+++ b/mindspore/ccsrc/pipeline/pynative/op_function/converter.cc
@ -38,6 +38,7 @@ BoolImmPtr ConvertBool(const py::object &obj) {
 }

 Int64ImmPtr ConvertInt(const py::object &obj) {
+  // bool is also an instance of py::int_
  if (py::isinstance<py::bool_>(obj) || !py::isinstance<py::int_>(obj)) {
    return nullptr;
  }
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/mean_ext_aclnn_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/mean_ext_aclnn_kernel.cc
@ -0,0 +1,54 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "plugin/device/ascend/kernel/opapi/aclnn/mean_ext_aclnn_kernel.h"
+#include <algorithm>
+#include <vector>
+#include <map>
+#include <memory>
+#include <functional>
+#include "ir/tensor.h"
+#include "transform/acl_ir/acl_helper.h"
+#include "transform/acl_ir/op_api_convert.h"
+#include "abstract/ops/primitive_infer_map.h"
+
+namespace mindspore {
+namespace kernel {
+void MeanExtAscend::GetWorkSpaceInfo(const std::vector<KernelTensor *> &inputs,
+                                     const std::vector<KernelTensor *> &outputs) {
+  const auto axis_opt = inputs[kIndex1]->GetOptionalValueWithCheck<std::vector<int64_t>>();
+  if (axis_opt.has_value()) {
+    axis_ = axis_opt.value();
+  } else {
+    axis_ = std::vector<int64_t>{};
+  }
+  keep_dims_ = transform::ConvertKernelTensor<bool>(inputs[kIndex2]);
+  // Infer function has confirmed the actual dtype of output
+  dtype_ = outputs[kIndex0]->dtype_id();
+  GetWorkspaceForResize(inputs[kIndex0], axis_, keep_dims_, dtype_, outputs[kIndex0]);
+}
+
+bool MeanExtAscend::Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
+                           const std::vector<KernelTensor *> &outputs, void *stream_ptr) {
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  ParseGenExecutor(
+    GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], axis_, keep_dims_, dtype_, outputs[kIndex0]));
+  RunOp(stream_ptr, workspace);
+  return true;
+}
+
+MS_ACLNN_KERNEL_FACTORY_REG(MeanExt, MeanExtAscend);
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/mean_ext_aclnn_kernel.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/mean_ext_aclnn_kernel.h
@ -0,0 +1,46 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_MEAN_EXT_ACLNN_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_MEAN_EXT_ACLNN_KERNEL_H_
+
+#include <vector>
+#include <utility>
+#include "ops/base_operator.h"
+#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h"
+#include "transform/acl_ir/acl_convert.h"
+
+namespace mindspore {
+namespace kernel {
+
+class MeanExtAscend : public AclnnKernelMod {
+ public:
+  MeanExtAscend() : AclnnKernelMod(std::move("aclnnMean")) {}
+  ~MeanExtAscend() = default;
+  bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
+              const std::vector<KernelTensor *> &outputs, void *stream_ptr) override;
+
+  void GetWorkSpaceInfo(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override;
+
+ private:
+  DEFINE_GET_WORKSPACE_FOR_RESIZE()
+  std::vector<int64_t> axis_{};
+  bool keep_dims_{false};
+  TypeId dtype_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_MEAN_EXT_ACLNN_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/sum_ext_aclnn_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/sum_ext_aclnn_kernel.cc
@ -0,0 +1,54 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "plugin/device/ascend/kernel/opapi/aclnn/sum_ext_aclnn_kernel.h"
+#include <algorithm>
+#include <vector>
+#include <map>
+#include <memory>
+#include <functional>
+#include "ir/tensor.h"
+#include "transform/acl_ir/acl_helper.h"
+#include "transform/acl_ir/op_api_convert.h"
+#include "abstract/ops/primitive_infer_map.h"
+
+namespace mindspore {
+namespace kernel {
+void SumExtAscend::GetWorkSpaceInfo(const std::vector<KernelTensor *> &inputs,
+                                    const std::vector<KernelTensor *> &outputs) {
+  const auto axis_opt = inputs[kIndex1]->GetOptionalValueWithCheck<std::vector<int64_t>>();
+  if (axis_opt.has_value()) {
+    axis_ = axis_opt.value();
+  } else {
+    axis_ = std::vector<int64_t>{};
+  }
+  keep_dims_ = transform::ConvertKernelTensor<bool>(inputs[kIndex2]);
+  // Infer function has confirmed the actual dtype of output
+  dtype_ = outputs[kIndex0]->dtype_id();
+  GetWorkspaceForResize(inputs[kIndex0], axis_, keep_dims_, dtype_, outputs[kIndex0]);
+}
+
+bool SumExtAscend::Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
+                          const std::vector<KernelTensor *> &outputs, void *stream_ptr) {
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  ParseGenExecutor(
+    GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], axis_, keep_dims_, dtype_, outputs[kIndex0]));
+  RunOp(stream_ptr, workspace);
+  return true;
+}
+
+MS_ACLNN_KERNEL_FACTORY_REG(SumExt, SumExtAscend);
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/sum_ext_aclnn_kernel.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/sum_ext_aclnn_kernel.h
@ -0,0 +1,46 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_
+
+#include <vector>
+#include <utility>
+#include "ops/base_operator.h"
+#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h"
+#include "transform/acl_ir/acl_convert.h"
+
+namespace mindspore {
+namespace kernel {
+
+class SumExtAscend : public AclnnKernelMod {
+ public:
+  SumExtAscend() : AclnnKernelMod(std::move("aclnnReduceSum")) {}
+  ~SumExtAscend() = default;
+  bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
+              const std::vector<KernelTensor *> &outputs, void *stream_ptr) override;
+
+  void GetWorkSpaceInfo(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override;
+
+ private:
+  DEFINE_GET_WORKSPACE_FOR_RESIZE()
+  std::vector<int64_t> axis_{};
+  bool keep_dims_{false};
+  TypeId dtype_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/mean_ext.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/mean_ext.cc
@ -0,0 +1,59 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/ascend/kernel/pyboost/customize/mean_ext.h"
+#include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
+#include "kernel/pyboost/pyboost_utils.h"
+#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h"
+#include "runtime/device/device_address_utils.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+tensor::TensorPtr MeanExtAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                                         const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  std::vector<int64_t> axis_vector{};
+  if (axis.has_value()) {
+    axis_vector = ConvertValueTupleToVector<int64_t>(axis.value());
+  }
+  const auto keep_dims_imm = GetValue<bool>(keep_dims);
+
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(
+    std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis_vector, keep_dims_imm, out_dtype]() {
+      auto device_context = op->device_context();
+
+      PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+      PyBoostUtils::MallocOpOutputs(device_context, op->outputs());
+
+      MS_LOG(DEBUG) << op->primitive()->name() << " Call start";
+      LAUNCH_ACLNN(aclnnMean, device_context, op->stream_id(), input_tensor, axis_vector, keep_dims_imm, out_dtype,
+                   op->output(0));
+      MS_LOG(DEBUG) << op->primitive()->name() << " Launch end";
+    }));
+  return op->output(0);
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/mean_ext.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/mean_ext.h
@ -0,0 +1,35 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+
+#include <memory>
+#include <vector>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+tensor::TensorPtr MeanExtAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                                         const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sum_ext.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sum_ext.cc
@ -0,0 +1,58 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/ascend/kernel/pyboost/customize/sum_ext.h"
+#include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
+#include "kernel/pyboost/pyboost_utils.h"
+#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h"
+#include "runtime/device/device_address_utils.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+tensor::TensorPtr SumExtAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                                        const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  std::vector<int64_t> axis_vector{};
+  if (axis.has_value()) {
+    axis_vector = ConvertValueTupleToVector<int64_t>(axis.value());
+  }
+  const auto keep_dims_imm = GetValue<bool>(keep_dims);
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(
+    std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis_vector, keep_dims_imm, out_dtype]() {
+      auto device_context = op->device_context();
+
+      PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+      PyBoostUtils::MallocOpOutputs(device_context, op->outputs());
+
+      MS_LOG(DEBUG) << op->primitive()->name() << " Call start";
+      LAUNCH_ACLNN(aclnnReduceSum, device_context, op->stream_id(), input_tensor, axis_vector, keep_dims_imm, out_dtype,
+                   op->output(0));
+      MS_LOG(DEBUG) << op->primitive()->name() << " Launch end";
+    }));
+  return op->output(0);
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sum_ext.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sum_ext.h
@ -0,0 +1,35 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+
+#include <memory>
+#include <vector>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+tensor::TensorPtr SumExtAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                                        const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/reduce_axis_update.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/reduce_axis_update.cc
@ -17,6 +17,8 @@
 #include "plugin/device/ascend/optimizer/mindir/reduce_axis_update.h"
 #include <vector>
 #include <memory>
+#include <set>
+#include <string>
 #include "mindspore/core/ops/math_ops.h"
 #include "include/common/utils/anfalgo.h"

@ -61,7 +63,8 @@ bool ReduceAxisUpdate::IsReduce(const BaseRef &ref) {
    if (IsPrimitive(node, prim::kPrimReduceMin) || IsPrimitive(node, prim::kPrimReduceMax) ||
        IsPrimitive(node, prim::kPrimReduceMean) || IsPrimitive(node, prim::kPrimReduceSum) ||
        IsPrimitive(node, prim::kPrimReduceProd) || IsPrimitive(node, prim::kPrimReduceAll) ||
-        IsPrimitive(node, prim::kPrimReduceAny)) {
+        IsPrimitive(node, prim::kPrimReduceAny) || IsPrimitive(node, prim::kPrimMeanExt) ||
+        IsPrimitive(node, prim::kPrimSumExt)) {
      return true;
    }
  }
@ -90,6 +93,15 @@ bool ReduceAxisUpdate::IsAxisEmpty(const ValueNodePtr &axis_node) const {
  return false;
 }

+bool ReduceAxisUpdate::IsAxisNone(const AnfNodePtr &cnode, const ValueNodePtr &axis_node) const {
+  static std::set<std::string> op_name_support_none = {prim::kPrimMeanExt->name(), prim::kPrimSumExt->name()};
+  auto cnode_name = common::AnfAlgo::GetCNodeName(cnode);
+  if (op_name_support_none.find(cnode_name) == op_name_support_none.end()) {
+    return false;
+  }
+  return axis_node->value()->isa<None>();
+}
+
 bool ReduceAxisUpdate::IsInputScalar(const AnfNodePtr &x_node) const {
  MS_EXCEPTION_IF_NULL(x_node);
  auto x_shape_ptr = x_node->Shape();
@ -146,6 +158,12 @@ bool ReduceAxisUpdate::CheckMatchedDAG(const PatternMap &, const FuncGraphPtr &g
    return false;
  }

+  // If input is dynamic rank, expand axis will get wrong result.
+  if (IsDynamicRank(common::AnfAlgo::GetPrevNodeOutputInferShape(node, 0))) {
+    MS_LOG(INFO) << "The input rank of dimension of " << node->DebugString() << " is unknown.";
+    return false;
+  }
+
  auto cnode = node->cast<CNodePtr>();
  MS_EXCEPTION_IF_NULL(cnode);

@ -165,8 +183,10 @@ bool ReduceAxisUpdate::CheckMatchedDAG(const PatternMap &, const FuncGraphPtr &g
  MS_LOG(INFO) << "Axis input is " << input_axis->DebugString() << ".";

  auto axis_value_node = input_axis->cast<ValueNodePtr>();
-  if (axis_value_node == nullptr || (!IsAxisEmpty(axis_value_node) && !IsInputScalar(input_x))) {
-    MS_LOG(INFO) << "Axis input of node " << node->fullname_with_scope() << " is not value node or axis is not empty.";
+  if (axis_value_node == nullptr ||
+      (!(IsAxisEmpty(axis_value_node) || IsAxisNone(cnode, axis_value_node)) && !IsInputScalar(input_x))) {
+    MS_LOG(INFO) << "Axis input of node " << node->fullname_with_scope()
+                 << " is not value node or axis is not empty or none.";
    return false;
  } else {
    MS_LOG(INFO) << "Axis of node " << node->fullname_with_scope() << " is empty.";
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/reduce_axis_update.h
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/reduce_axis_update.h
@ -33,6 +33,7 @@ class ReduceAxisUpdate : public PatternToPatternPass {

 private:
  bool IsAxisEmpty(const ValueNodePtr &axis_node) const;
+  bool IsAxisNone(const AnfNodePtr &cnode, const ValueNodePtr &axis_node) const;
  bool IsInputScalar(const AnfNodePtr &x_node) const;
  static bool IsReduce(const BaseRef &ref);
 };
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/mean_ext.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/mean_ext.cc
@ -0,0 +1,102 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/cpu/kernel/pyboost/customize/mean_ext.h"
+#include "plugin/device/cpu/kernel/pyboost/auto_generate/cast.h"
+#include "plugin/device/cpu/kernel/pyboost/auto_generate/mean_ext.h"
+#include "kernel/pyboost/pyboost_utils.h"
+#include "ops/auto_generate/gen_ops_primitive.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+namespace {
+void MeanExtCPUCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor, const ValuePtr &axis,
+                    const BoolImmPtr &keep_dims, const std::vector<AbstractBasePtr> &input_abs) {
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(
+    std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis, keep_dims, input_abs]() {
+      MS_LOG(DEBUG) << "For 'MeanExt', the cpu task 'ReduceMean' start";
+      auto device_context = op->device_context();
+      const auto &outputs = op->outputs();
+      const auto primitive = std::make_shared<Primitive>(prim::kPrimReduceMean->name());
+      MS_EXCEPTION_IF_NULL(primitive);
+
+      PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+      PyBoostUtils::MallocOpOutputs(device_context, outputs);
+
+      const auto &input_address_info =
+        PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, input_tensor, axis, keep_dims);
+      const auto &output_address_info =
+        PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs);
+
+      PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info);
+      MS_LOG(DEBUG) << "For 'MeanExt', the cpu task 'ReduceMean' end";
+    }));
+}
+}  // namespace
+
+void MeanExtCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                         const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  ValuePtr act_axis;
+  if (axis.has_value()) {
+    act_axis = axis.value();
+  } else {
+    act_axis = MakeValue<std::vector<int64_t>>({});
+  }
+
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  TensorPtr act_tensor = input_tensor;
+  // Call Cast before Launch ReduceMean
+  if (input_tensor->data_type() != out_dtype) {
+    MS_LOG(DEBUG) << "Call Cast cpu kernel, src dtype: " << TypeIdToString(input_tensor->data_type())
+                  << ", dst dtype: " << TypeIdToString(out_dtype);
+    act_tensor =
+      PyBoostUtils::CastTensor(input_tensor, out_dtype, op->device_context()->device_context_key_.device_name_);
+  }
+
+  // Set new input abstract for ReduceMean
+  std::vector<AbstractBasePtr> new_input_abs{act_tensor->ToAbstract(), act_axis->ToAbstract(), keep_dims->ToAbstract()};
+
+  // Check if dtype is matched on ReduceMean kernel
+  auto kernel_attr_pair =
+    PyBoostUtils::SelectKernel(new_input_abs, op->output_abs(), op->device_context(), prim::kPrimReduceMean->name());
+  if (kernel_attr_pair.first) {
+    MeanExtCPUCall(op, act_tensor, act_axis, keep_dims, new_input_abs);
+  } else {
+    auto &select_kernel = kernel_attr_pair.second;
+    auto &device_name = op->device_context()->device_context_key_.device_name_;
+    const auto &real_input_tensor =
+      PyBoostUtils::CastTensor(act_tensor, select_kernel.input_type()[0].dtype, device_name);
+
+    const auto &mean_ext_op = CREATE_PYBOOST_OP(MeanExt, device_name);
+    mean_ext_op->set_primitive(prim::kPrimMeanExt);
+    const auto out_tensor = mean_ext_op->Call(real_input_tensor, axis, keep_dims, std::nullopt);
+
+    const auto &real_output_tensor = PyBoostUtils::CastTensor(out_tensor, out_dtype, device_name);
+    op->set_outputs({real_output_tensor});
+  }
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/mean_ext.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/mean_ext.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+
+#include <vector>
+#include <memory>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+void MeanExtCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                         const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/sum_ext.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/sum_ext.cc
@ -0,0 +1,106 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/cpu/kernel/pyboost/customize/sum_ext.h"
+#include "plugin/device/cpu/kernel/pyboost/auto_generate/cast.h"
+#include "plugin/device/cpu/kernel/pyboost/auto_generate/sum_ext.h"
+#include "kernel/pyboost/pyboost_utils.h"
+#include "ops/auto_generate/gen_ops_primitive.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+namespace {
+void SumExtCPUCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor, const ValuePtr &axis,
+                   const BoolImmPtr &keep_dims, const BoolImmPtr &skip_mode,
+                   const std::vector<AbstractBasePtr> &input_abs) {
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(
+    std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis, keep_dims, skip_mode, input_abs]() {
+      MS_LOG(DEBUG) << "For 'SumExt', the cpu task 'ReduceSum' start";
+      auto device_context = op->device_context();
+      const auto &outputs = op->outputs();
+      const auto primitive = std::make_shared<Primitive>(prim::kPrimReduceSum->name());
+      MS_EXCEPTION_IF_NULL(primitive);
+
+      PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+      PyBoostUtils::MallocOpOutputs(device_context, outputs);
+
+      const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs,
+                                                                    input_tensor, axis, keep_dims, skip_mode);
+      const auto &output_address_info =
+        PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs);
+
+      PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info);
+      MS_LOG(DEBUG) << "For 'SumExt', the cpu task 'ReduceSum' end";
+    }));
+}
+}  // namespace
+
+void SumExtCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                        const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  // If axis is None, Convert axis to empty tuple
+  ValuePtr act_axis;
+  if (axis.has_value()) {
+    act_axis = axis.value();
+  } else {
+    act_axis = MakeValue<std::vector<int64_t>>({});
+  }
+
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  TensorPtr act_tensor = input_tensor;
+  // Call Cast before Launch ReduceSum
+  if (input_tensor->data_type() != out_dtype) {
+    MS_LOG(DEBUG) << "Call Cast cpu kernel, src dtype: " << TypeIdToString(input_tensor->data_type())
+                  << ", dst dtype: " << TypeIdToString(out_dtype);
+    act_tensor =
+      PyBoostUtils::CastTensor(input_tensor, out_dtype, op->device_context()->device_context_key_.device_name_);
+  }
+
+  const auto skip_mode = std::make_shared<BoolImm>(false);
+  // Set new input abstract for ReduceSum
+  std::vector<AbstractBasePtr> new_input_abs{act_tensor->ToAbstract(), act_axis->ToAbstract(), keep_dims->ToAbstract(),
+                                             skip_mode->ToAbstract()};
+
+  // Check if dtype is matched on ReduceSum kernel
+  auto kernel_attr_pair =
+    PyBoostUtils::SelectKernel(new_input_abs, op->output_abs(), op->device_context(), prim::kPrimReduceSum->name());
+  if (kernel_attr_pair.first) {
+    SumExtCPUCall(op, act_tensor, act_axis, keep_dims, std::make_shared<BoolImm>(false), new_input_abs);
+  } else {
+    auto &select_kernel = kernel_attr_pair.second;
+    auto &device_name = op->device_context()->device_context_key_.device_name_;
+    const auto &real_input_tensor =
+      PyBoostUtils::CastTensor(act_tensor, select_kernel.input_type()[0].dtype, device_name);
+
+    const auto &sum_ext_op = CREATE_PYBOOST_OP(SumExt, device_name);
+    sum_ext_op->set_primitive(prim::kPrimSumExt);
+    const auto out_tensor = sum_ext_op->Call(real_input_tensor, axis, keep_dims, std::nullopt);
+
+    const auto &real_output_tensor = PyBoostUtils::CastTensor(out_tensor, out_dtype, device_name);
+    op->set_outputs({real_output_tensor});
+  }
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/sum_ext.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/sum_ext.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+
+#include <vector>
+#include <memory>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+void SumExtCPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                        const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc
@ -0,0 +1,90 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/gpu/kernel/pyboost/customize/mean_ext.h"
+#include "plugin/device/gpu/kernel/pyboost/auto_generate/cast.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
+#include "ops/auto_generate/gen_ops_primitive.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+namespace {
+void MeanExtGPUCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor, const ValuePtr &axis,
+                    const BoolImmPtr &keep_dims) {
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis, keep_dims]() {
+    MS_LOG(DEBUG) << "For 'MeanExt', the gpu task 'ReduceMean' start";
+    auto device_context = op->device_context();
+    const auto &outputs = op->outputs();
+    const auto primitive = std::make_shared<Primitive>(prim::kPrimReduceMean->name());
+    MS_EXCEPTION_IF_NULL(primitive);
+
+    PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+    PyBoostUtils::MallocOpOutputs(device_context, outputs);
+    // Set new Abstract for ReduceMean
+    std::vector<AbstractBasePtr> input_abs{input_tensor->ToAbstract(), axis->ToAbstract(), keep_dims->ToAbstract()};
+
+    const auto &input_address_info =
+      PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, input_tensor, axis, keep_dims);
+    const auto &output_address_info =
+      PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs);
+    auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id());
+
+    PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, stream);
+    static auto sync = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
+    if (sync && !device_context->device_res_manager_->SyncAllStreams()) {
+      MS_LOG(EXCEPTION) << "SyncStream failed for op " << primitive->name();
+    }
+    MS_LOG(DEBUG) << "For 'MeanExt', the gpu task 'ReduceMean' end";
+  }));
+}
+}  // namespace
+
+void MeanExtGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                         const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  // If axis is None, Convert axis to empty tuple
+  ValuePtr act_axis;
+  if (axis.has_value()) {
+    act_axis = axis.value();
+  } else {
+    act_axis = MakeValue<std::vector<int64_t>>({});
+  }
+
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  TensorPtr act_tensor = input_tensor;
+  // Call Cast before Launch ReduceMean
+  if (input_tensor->data_type() != out_dtype) {
+    MS_LOG(DEBUG) << "Call Cast gpu kernel, src dtype: " << TypeIdToString(input_tensor->data_type())
+                  << ", dst dtype: " << TypeIdToString(out_dtype);
+    const auto &cast_op = CREATE_PYBOOST_OP(Cast, op->device_context()->device_context_key_.device_name_);
+    cast_op->set_primitive(prim::kPrimCast);
+    act_tensor = cast_op->Call(input_tensor, std::make_shared<Int64Imm>(out_dtype));
+  }
+
+  MeanExtGPUCall(op, act_tensor, act_axis, keep_dims);
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
+
+#include <vector>
+#include <memory>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+void MeanExtGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                         const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                         const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_MEAN_EXT_H_
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc
@ -0,0 +1,92 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/gpu/kernel/pyboost/customize/sum_ext.h"
+#include "plugin/device/gpu/kernel/pyboost/auto_generate/cast.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
+#include "ops/auto_generate/gen_ops_primitive.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+namespace {
+void SumExtGPUCall(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor, const ValuePtr &axis,
+                   const BoolImmPtr &keep_dims, const BoolImmPtr &skip_mode) {
+  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
+  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
+
+  PyBoostUtils::DispatchRun(
+    std::make_shared<runtime::PyBoostDeviceTask>([op, input_tensor, axis, keep_dims, skip_mode]() {
+      MS_LOG(DEBUG) << "For 'SumExt', the gpu task 'ReduceSum' start";
+      auto device_context = op->device_context();
+      const auto &outputs = op->outputs();
+      const auto primitive = std::make_shared<Primitive>(prim::kPrimReduceSum->name());
+      MS_EXCEPTION_IF_NULL(primitive);
+
+      PyBoostUtils::MallocOpInputs(device_context, input_tensor);
+      PyBoostUtils::MallocOpOutputs(device_context, outputs);
+      // Set new Abstract for ReduceSum
+      std::vector<AbstractBasePtr> input_abs{input_tensor->ToAbstract(), axis->ToAbstract(), keep_dims->ToAbstract(),
+                                             skip_mode->ToAbstract()};
+
+      const auto &input_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs,
+                                                                    input_tensor, axis, keep_dims, skip_mode);
+      const auto &output_address_info =
+        PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs);
+      auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id());
+
+      PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, stream);
+      static auto sync = MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE);
+      if (sync && !device_context->device_res_manager_->SyncAllStreams()) {
+        MS_LOG(EXCEPTION) << "SyncStream failed for op " << primitive->name();
+      }
+      MS_LOG(DEBUG) << "For 'SumExt', the gpu task 'ReduceSum' end";
+    }));
+}
+}  // namespace
+
+void SumExtGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                        const std::optional<Int64ImmPtr> &dtype) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims, dtype);
+
+  // If axis is None, Convert axis to empty tuple
+  ValuePtr act_axis;
+  if (axis.has_value()) {
+    act_axis = axis.value();
+  } else {
+    act_axis = MakeValue<std::vector<int64_t>>({});
+  }
+
+  // Infer function has confirmed the actual dtype of output
+  TypeId out_dtype = op->output_abs()->GetType()->cast<TensorTypePtr>()->element()->type_id();
+
+  TensorPtr act_tensor = input_tensor;
+  // Call Cast before Launch ReduceSum
+  if (input_tensor->data_type() != out_dtype) {
+    MS_LOG(DEBUG) << "Call Cast gpu kernel, src dtype: " << TypeIdToString(input_tensor->data_type())
+                  << ", dst dtype: " << TypeIdToString(out_dtype);
+    const auto &cast_op = CREATE_PYBOOST_OP(Cast, op->device_context()->device_context_key_.device_name_);
+    cast_op->set_primitive(prim::kPrimCast);
+    act_tensor = cast_op->Call(input_tensor, std::make_shared<Int64Imm>(out_dtype));
+  }
+
+  SumExtGPUCall(op, act_tensor, act_axis, keep_dims, std::make_shared<BoolImm>(false));
+}
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
+
+#include <vector>
+#include <memory>
+#include "ir/tensor.h"
+#include "ir/value.h"
+#include "runtime/hardware/device_context_manager.h"
+#include "kernel/pyboost/op_runner.h"
+
+namespace mindspore {
+namespace kernel {
+namespace pyboost {
+void SumExtGPUCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+                        const std::optional<ValueTuplePtr> &axis, const BoolImmPtr &keep_dims,
+                        const std::optional<Int64ImmPtr> &dtype);
+}  // namespace pyboost
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_SUM_EXT_H_
--- a/mindspore/core/ops/ops_def/doc/mean_ext_doc.yaml
+++ b/mindspore/core/ops/ops_def/doc/mean_ext_doc.yaml
@ -0,0 +1,77 @@
+mean_ext:
+    description: |
+        Reduces all dimension of a tensor by averaging all elements in the dimension, by default.
+        And reduce a dimension of `input` along the specified `axis`. `keep_dims`
+        determines whether the dimensions of the output and input are the same.
+
+        Args:
+            input (Tensor[Number]): The input tensor. The dtype of the tensor to be reduced is number.
+                :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+            axis (Union[int, tuple(int), list(int)]): The dimensions to reduce. Default: ``None`` , reduce all dimensions.
+                Only constant value is allowed. Assume the rank of `input` is r, and the value range is [-r,r).
+            keep_dims (bool): If ``True``, keep these reduced dimensions and the length is 1.
+                If ``False``, don't keep these dimensions. Default: ``False`` .
+            dtype (:class:`mindspore.dtype`): The desired data type of returned Tensor. Default: ``None``.
+
+        Returns:
+            Tensor, has the same data type as input tensor.
+
+            - If `axis` is None, and `keep_dims` is False,
+              the output is a 0-D tensor representing the product of all elements in the input tensor.
+            - If `axis` is int, set as 1, and `keep_dims` is False,
+              the shape of output is :math:`(x_0, x_2, ..., x_R)`.
+            - If `axis` is tuple(int), set as (1, 2), and `keep_dims` is ``False`` ,
+              the shape of output is :math:`(x_0, x_3, ..., x_R)`.
+
+        Raises:
+            TypeError: If `x` is not a Tensor.
+            TypeError: If `axis` is not one of the following: int, tuple or list.
+            TypeError: If `keep_dims` is not a bool.
+            ValueError: If `axis` is out of range.
+
+        Supported Platforms:
+            ``Ascend`` ``GPU`` ``CPU``
+
+        Examples:
+            >>> import mindspore
+            >>> import numpy as np
+            >>> from mindspore import Tensor, ops
+            >>> x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
+            >>> output = ops.mean(x, 1, keep_dims=True)
+            >>> result = output.shape
+            >>> print(result)
+            (3, 1, 5, 6)
+            >>> # case 1: Reduces a dimension by averaging all elements in the dimension.
+            >>> x = Tensor(np.array([[[2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2]],
+            ... [[4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]],
+            ... [[6, 6, 6, 6, 6, 6], [8, 8, 8, 8, 8, 8], [10, 10, 10, 10, 10, 10]]]),
+            ... mindspore.float32)
+            >>> output = ops.mean(x)
+            >>> print(output)
+            5.0
+            >>> print(output.shape)
+            ()
+            >>> # case 2: Reduces a dimension along the axis 0
+            >>> output = ops.mean(x, 0, True)
+            >>> print(output)
+            [[[4. 4. 4. 4. 4. 4.]
+            [5. 5. 5. 5. 5. 5.]
+            [6. 6. 6. 6. 6. 6.]]]
+            >>> # case 3: Reduces a dimension along the axis 1
+            >>> output = ops.mean(x, 1, True)
+            >>> print(output)
+            [[[2. 2. 2. 2. 2. 2.]]
+            [[5. 5. 5. 5. 5. 5.]]
+            [[8. 8. 8. 8. 8. 8.]]]
+            >>> # case 4: Reduces a dimension along the axis 2
+            >>> output = ops.mean(x, 2, True)
+            >>> print(output)
+            [[[ 2.]
+            [ 2.]
+            [ 2.]]
+            [[ 4.]
+            [ 5.]
+            [ 6.]]
+            [[ 6.]
+            [ 8.]
+            [10.]]]
--- a/mindspore/core/ops/ops_def/doc/sum_ext_doc.yaml
+++ b/mindspore/core/ops/ops_def/doc/sum_ext_doc.yaml
@ -0,0 +1,54 @@
+sum_ext:
+    description: |
+        Calculate sum of Tensor elements over a given dim.
+
+        Args:
+            input (Tensor): The input tensor.
+            dim (Union[None, int, tuple(int), list(int)]): Dimensions along which a sum is performed.
+                If ``None``, sum all the elements of the input tensor.
+                If the `dim` is a tuple or list of ints, a sum is performed on all the dimensions specified in the tuple.
+                Must be in the range :math:`[-input.ndim, input.ndim)` . Default: ``None``.
+            keepdim (bool): Whether the output tensor has dim retained or not.
+                If ``True``, keep these reduced dimensions and the length is 1.
+                If ``False``, don't keep these dimensions. Default: ``False``.
+            dtype (:class:`mindspore.dtype`): The desired data type of returned Tensor. Default: ``None``.
+
+        Returns:
+            A Tensor, sum of elements over a given dim in `input`.
+
+        Raises:
+            TypeError: If `input` is not a Tensor.
+            TypeError: If `dim` is not an int, tulpe(int), list(int) or None.
+            ValueError: If `dim` is not in the range :math:`[-input.ndim, input.ndim)` .
+            TypeError: If `keepdim` is not a bool.
+
+        Supported Platforms:
+            ``Ascend`` ``GPU`` ``CPU``
+
+        Examples:
+            >>> import mindspore
+            >>> import numpy as np
+            >>> from mindspore import Tensor, ops
+            >>> from mindspore import dtype as mstype
+            >>> x = Tensor(np.array([[[1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2], [3, 3, 3, 3, 3, 3]],
+            ...                      [[4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]],
+            ...                      [[7, 7, 7, 7, 7, 7], [8, 8, 8, 8, 8, 8], [9, 9, 9, 9, 9, 9]]]), mstype.float32)
+            >>> out = ops.sum(x)
+            >>> print(out)
+            270.0
+            >>> out = ops.sum(x, dim=2)
+            >>> print(out)
+            [[ 6. 12. 18.]
+            [24. 30. 36.]
+            [42. 48. 54.]]
+            >>> out = ops.sum(x, dim=2, keepdim=True)
+            >>> print(out)
+            [[[ 6.]
+            [12.]
+            [18.]]
+            [[24.]
+            [30.]
+            [36.]]
+            [[42.]
+            [48.]
+            [54.]]]
--- a/mindspore/core/ops/ops_def/mean_ext_op.yaml
+++ b/mindspore/core/ops/ops_def/mean_ext_op.yaml
@ -0,0 +1,26 @@
+#operator mean_ext
+mean_ext:
+    args:
+        input:
+            dtype: tensor
+        axis:
+            dtype: tuple[int]
+            type_cast: int, list[int], tensor
+            default: None
+        keep_dims:
+            dtype: bool
+            default: False
+        dtype:
+            dtype: TypeId
+            arg_handler: dtype_to_type_id
+            default: None
+    returns:
+        output:
+            dtype: tensor
+    dispatch:
+        enable: True
+        Ascend: MeanExtAscend
+        CPU: MeanExtCPU
+        GPU: MeanExtGPU
+    function:
+        name: mean
--- a/mindspore/core/ops/ops_def/sum_ext_op.yaml
+++ b/mindspore/core/ops/ops_def/sum_ext_op.yaml
@ -0,0 +1,24 @@
+#operator sum_ext
+sum_ext:
+    args:
+        input:
+            dtype: tensor
+        dim:
+            dtype: tuple[int]
+            type_cast: int, list[int]
+            default: None
+        keepdim:
+            dtype: bool
+            default: False
+        dtype:
+            dtype: TypeId
+            arg_handler: dtype_to_type_id
+            default: None
+    returns:
+        output:
+            dtype: tensor
+    dispatch:
+        enable: True
+        Ascend: SumExtAscend
+        CPU: SumExtCPU
+        GPU: SumExtGPU
--- a/mindspore/core/ops/ops_frontend_func_impl/mean_ext.cc
+++ b/mindspore/core/ops/ops_frontend_func_impl/mean_ext.cc
@ -0,0 +1,32 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ops/ops_frontend_func_impl.h"
+
+namespace mindspore::ops {
+namespace {
+constexpr auto kMeanExt = "MeanExt";
+}  // namespace
+
+class MeanExtFrontendFuncImpl : public OpFrontendFuncImpl {
+ public:
+  ValuePtr InferValue(const PrimitivePtr &, const std::vector<AbstractBasePtr> &input_args) const override {
+    return InferValueCallback::GetInstance().CallPyInferValue(kMeanExt, input_args);
+  }
+};
+
+REGISTER_PRIMITIVE_FUNCTION_FRONTEND_FUNC_IMPL(kMeanExt, MeanExtFrontendFuncImpl);
+}  // namespace mindspore::ops
--- a/mindspore/core/ops/ops_frontend_func_impl/sum_ext.cc
+++ b/mindspore/core/ops/ops_frontend_func_impl/sum_ext.cc
@ -0,0 +1,32 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ops/ops_frontend_func_impl.h"
+
+namespace mindspore::ops {
+namespace {
+constexpr auto kSumExt = "SumExt";
+}  // namespace
+
+class SumExtFrontendFuncImpl : public OpFrontendFuncImpl {
+ public:
+  ValuePtr InferValue(const PrimitivePtr &, const std::vector<AbstractBasePtr> &input_args) const override {
+    return InferValueCallback::GetInstance().CallPyInferValue(kSumExt, input_args);
+  }
+};
+
+REGISTER_PRIMITIVE_FUNCTION_FRONTEND_FUNC_IMPL(kSumExt, SumExtFrontendFuncImpl);
+}  // namespace mindspore::ops
--- a/mindspore/core/ops/ops_func_impl/mean.cc
+++ b/mindspore/core/ops/ops_func_impl/mean.cc
@ -1,30 +0,0 @@
-/**
- * Copyright 2023 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ops/ops_func_impl/mean.h"
-#include "ops/op_utils.h"
-
-namespace mindspore {
-namespace ops {
-BaseShapePtr MeanFuncImpl::InferShape(const PrimitivePtr &primitive,
-                                      const std::vector<AbstractBasePtr> &input_args) const {
-  return ReduceExtInferShape(primitive, input_args);
-}
-
-TypePtr MeanFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const {
-  return ReduceExtInferType(primitive, input_args);
-}
-}  // namespace ops
-}  // namespace mindspore
--- a/mindspore/core/ops/ops_func_impl/mean_ext.cc
+++ b/mindspore/core/ops/ops_func_impl/mean_ext.cc
@ -0,0 +1,38 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ops/ops_func_impl/mean_ext.h"
+#include <memory>
+#include "ops/ops_func_impl/reduce_arithmetic.h"
+#include "ops/op_utils.h"
+
+namespace mindspore {
+namespace ops {
+BaseShapePtr MeanExtFuncImpl::InferShape(const PrimitivePtr &primitive,
+                                         const std::vector<AbstractBasePtr> &input_args) const {
+  return ReduceExtandInferShape(primitive, input_args);
+}
+
+TypePtr MeanExtFuncImpl::InferType(const PrimitivePtr &primitive,
+                                   const std::vector<AbstractBasePtr> &input_args) const {
+  if (input_args[kIndex3]->GetType()->isa<TypeNone>()) {
+    return input_args[kIndex0]->GetType()->Clone();
+  }
+  auto dtype_opt = GetScalarValue<int64_t>(input_args[kIndex3]->GetValue());
+  MS_CHECK_VALUE(dtype_opt.has_value(), primitive->name() + " error: dtype input should has valid value.");
+  return std::make_shared<TensorType>(TypeIdToType(static_cast<TypeId>(dtype_opt.value())));
+}
+}  // namespace ops
+}  // namespace mindspore
--- a/mindspore/core/ops/ops_func_impl/mean_ext.h
+++ b/mindspore/core/ops/ops_func_impl/mean_ext.h
@ -14,17 +14,15 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_H_
-#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_H_
+#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_EXT_H_
+#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_EXT_H_

-#include <memory>
 #include <vector>
-#include "mindapi/base/types.h"
 #include "ops/ops_func_impl/op_func_impl.h"

 namespace mindspore {
 namespace ops {
-class MIND_API SumFuncImpl : public OpFuncImpl {
+class MIND_API MeanExtFuncImpl : public OpFuncImpl {
 public:
  BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
  TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
@ -32,4 +30,4 @@ class MIND_API SumFuncImpl : public OpFuncImpl {
 }  // namespace ops
 }  // namespace mindspore

-#endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_H_
+#endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_EXT_H_
--- a/mindspore/core/ops/ops_func_impl/reduce_arithmetic.cc
+++ b/mindspore/core/ops/ops_func_impl/reduce_arithmetic.cc
@ -86,5 +86,68 @@ BaseShapePtr ReduceInferShape(const PrimitivePtr &primitive, const std::vector<A
  }
  return std::make_shared<abstract::Shape>(out_shape);
 }
+
+BaseShapePtr ReduceExtandInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
+  auto keep_dims_value = input_args[kInputIndex2]->GetValue();
+  auto keep_dims_opt = GetScalarValue<bool>(keep_dims_value);
+  if (MS_UNLIKELY(!keep_dims_opt.has_value())) {
+    return std::make_shared<abstract::Shape>(ShapeVector({abstract::Shape::kShapeRankAny}));
+  }
+  auto keep_dims = keep_dims_opt.value();
+  auto x_shape = input_args[kInputIndex0]->GetShape()->GetShapeVector();
+
+  // If axis is None
+  if (input_args[kInputIndex1]->GetType()->isa<TypeNone>()) {
+    return keep_dims
+             ? std::make_shared<abstract::Shape>(IsDynamicRank(x_shape) ? x_shape : ShapeVector(x_shape.size(), 1))
+             : std::make_shared<abstract::Shape>(ShapeVector({}));
+  }
+
+  auto axis_array_opt = GetArrayValue<int64_t>(input_args[kInputIndex1]);
+  if (axis_array_opt.has_value()) {
+    // If axis is empty tuple and keep_dims is False, return a zero-dimensional Tensor
+    if (axis_array_opt->size() == 0 && !keep_dims) {
+      return std::make_shared<abstract::Shape>(ShapeVector({}));
+    }
+  }
+
+  if (IsDynamicRank(x_shape)) {
+    return std::make_shared<abstract::Shape>(x_shape);
+  }
+  if (!axis_array_opt.has_value()) {
+    // axis is dynamic.
+    return keep_dims ? std::make_shared<abstract::Shape>(ShapeVector(x_shape.size(), -1))
+                     : std::make_shared<abstract::Shape>(ShapeVector({abstract::Shape::kShapeRankAny}));
+  }
+
+  auto x_shape_size = x_shape.size();
+  auto axis_array = axis_array_opt.value();
+  // All values of the axis are known.
+  if (!axis_array.HasUnknownValue()) {
+    std::vector<int64_t> axis_vec = axis_array.ToVector();
+    std::vector<int64_t> real_axis_vec;
+    (void)std::transform(
+      axis_vec.begin(), axis_vec.end(), std::back_inserter(real_axis_vec),
+      [&x_shape_size, &primitive](const int64_t &axis) { return CalRealAixs(axis, x_shape_size, primitive); });
+    auto out_shape = ReduceFuncCalShapeInferImpl(primitive, x_shape, real_axis_vec, keep_dims);
+    return std::make_shared<abstract::Shape>(out_shape);
+  }
+
+  // If the axis has unknown value, the reduction position will be any of the input dimensions.
+  if (!keep_dims) {
+    MS_CHECK_VALUE(x_shape.size() >= axis_array_opt->size(),
+                   CheckAndConvertUtils::FormatCheckInRangeMsg("axis size", axis_array_opt->size(), kIncludeLeft,
+                                                               {0, x_shape.size()}, primitive));
+    return std::make_shared<abstract::Shape>(ShapeVector(x_shape.size() - axis_array_opt->size(), -1));
+  }
+  auto out_shape = ShapeVector(x_shape.size(), -1);
+  for (size_t i = 0; i < axis_array.size(); ++i) {
+    if (!axis_array.IsValueUnknown(i)) {
+      auto axis = CalRealAixs(axis_array[i], x_shape_size, primitive);
+      out_shape[axis] = 1;
+    }
+  }
+  return std::make_shared<abstract::Shape>(out_shape);
+}
 }  // namespace ops
 }  // namespace mindspore
--- a/mindspore/core/ops/ops_func_impl/reduce_arithmetic.h
+++ b/mindspore/core/ops/ops_func_impl/reduce_arithmetic.h
@ -24,6 +24,8 @@
 namespace mindspore {
 namespace ops {
 BaseShapePtr ReduceInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args);
+BaseShapePtr ReduceExtandInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args);
+int64_t CalRealAixs(const int64_t &axis, const size_t &x_shape_size, const PrimitivePtr &primitive);
 }  // namespace ops
 }  // namespace mindspore
 #endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_REDUCE_REDUCE_ARITHMETIC_H_
--- a/mindspore/core/ops/ops_func_impl/sum.cc
+++ b/mindspore/core/ops/ops_func_impl/sum.cc
@ -1,30 +0,0 @@
-/**
- * Copyright 2023 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ops/ops_func_impl/sum.h"
-#include "ops/op_utils.h"
-
-namespace mindspore {
-namespace ops {
-BaseShapePtr SumFuncImpl::InferShape(const PrimitivePtr &primitive,
-                                     const std::vector<AbstractBasePtr> &input_args) const {
-  return ReduceExtInferShape(primitive, input_args);
-}
-
-TypePtr SumFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const {
-  return ReduceExtInferType(primitive, input_args);
-}
-}  // namespace ops
-}  // namespace mindspore
--- a/mindspore/core/ops/ops_func_impl/sum_ext.cc
+++ b/mindspore/core/ops/ops_func_impl/sum_ext.cc
@ -0,0 +1,48 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ops/ops_func_impl/sum_ext.h"
+#include <set>
+#include "ops/ops_func_impl/reduce_arithmetic.h"
+#include "ops/op_utils.h"
+
+namespace mindspore {
+namespace ops {
+BaseShapePtr SumExtFuncImpl::InferShape(const PrimitivePtr &primitive,
+                                        const std::vector<AbstractBasePtr> &input_args) const {
+  return ReduceExtandInferShape(primitive, input_args);
+}
+
+TypePtr SumExtFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const {
+  TypeId type_id;
+  if (input_args[kIndex3]->GetType()->isa<TypeNone>()) {
+    auto tensor_type = input_args[kIndex0]->GetType()->cast<TensorTypePtr>();
+    MS_EXCEPTION_IF_NULL(tensor_type);
+    type_id = tensor_type->element()->type_id();
+    static std::set<TypeId> intergral_set = {kNumberTypeBool, kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeInt16,
+                                             kNumberTypeInt32};
+    if (intergral_set.find(type_id) != intergral_set.end()) {
+      type_id = kNumberTypeInt64;
+    }
+  } else {
+    auto dtype_opt = GetScalarValue<int64_t>(input_args[kIndex3]->GetValue());
+    MS_CHECK_VALUE(dtype_opt.has_value(), primitive->name() + " error: dtype input should has valid value.");
+    type_id = static_cast<TypeId>(dtype_opt.value());
+  }
+
+  return std::make_shared<TensorType>(TypeIdToType(type_id));
+}
+}  // namespace ops
+}  // namespace mindspore
--- a/mindspore/core/ops/ops_func_impl/sum_ext.h
+++ b/mindspore/core/ops/ops_func_impl/sum_ext.h
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_H_
-#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_H_
+#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_EXT_H_
+#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_EXT_H_

 #include <memory>
 #include <vector>
@ -24,7 +24,7 @@

 namespace mindspore {
 namespace ops {
-class MIND_API MeanFuncImpl : public OpFuncImpl {
+class MIND_API SumExtFuncImpl : public OpFuncImpl {
 public:
  BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
  TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override;
@ -32,4 +32,4 @@ class MIND_API MeanFuncImpl : public OpFuncImpl {
 }  // namespace ops
 }  // namespace mindspore

-#endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MEAN_H_
+#endif  // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SUM_EXT_H_
--- a/mindspore/python/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/python/mindspore/_extends/parse/standard_method.py
@ -70,7 +70,7 @@ itemsize_map = {mstype.bool_: 1, mstype.int8: 1, mstype.uint8: 1,
 nan_tensor = Tensor(float('nan'), dtype=mstype.float32)


-def mean(x, axis=None, keep_dims=False):
+def mean(x, axis=None, keep_dims=False, dtype=None):
    """
    Reduces a dimension of a tensor by averaging all elements in the dimension.

@ -78,6 +78,7 @@ def mean(x, axis=None, keep_dims=False):
        axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
            when axis is None or empty tuple, reduce all dimensions. Default: ().
        keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
+        dtype (:class:`mindspore.dtype`): The desired data type of returned Tensor. Default: ``None``.

    Returns:
        Tensor, has the same data type as input tensor.
@ -93,7 +94,7 @@ def mean(x, axis=None, keep_dims=False):
        >>> print(output)
        2.0
    """
-    return F.mean(x, axis, keep_dims)
+    return F.mean(x, axis, keep_dims, dtype)


 def ndimension(x):
--- a/mindspore/python/mindspore/common/tensor.py
+++ b/mindspore/python/mindspore/common/tensor.py
@ -1848,11 +1848,11 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
        """
        return tensor_operator_registry.get('log2')(self)

-    def mean(self, axis=None, keep_dims=False):
+    def mean(self, axis=None, keep_dims=False, dtype=None):
        """
        For details, please refer to :func:`mindspore.ops.mean`.
        """
-        return tensor_operator_registry.get('mean')(self, axis, keep_dims)
+        return tensor_operator_registry.get('mean')(self, axis, keep_dims, dtype)

    def amin(self, axis=None, keepdims=False, *, initial=None, where=None):
        """
@ -3355,14 +3355,9 @@ class Tensor(Tensor_, metaclass=_TensorMeta):
            >>> print(input_x.sum(axis=1))
            [10. 35.]
        """
-        if initial is not None and not isinstance(initial, (int, float, bool)):
-            raise TypeError(f"For Tensor.sum, initial must be int, float or bool, but got {type(initial)}.")
-        res = tensor_operator_registry.get("sum")(self, axis, keepdims)
-        if initial is not None:
-            res += initial
-        if dtype is not None:
-            res = res.astype(dtype)
-        return res
+        if initial is None:
+            return tensor_operator_registry.get("sum")(self, axis, keepdims, dtype=dtype)
+        return tensor_operator_registry.get("sum")(self, axis, keepdims, dtype=dtype) + initial

    def sum_to_size(self, *size):
        r"""
--- a/mindspore/python/mindspore/ops/function/math_func.py
+++ b/mindspore/python/mindspore/ops/function/math_func.py
@ -41,7 +41,7 @@ from mindspore.ops.auto_generate import (minimum, maximum, mul, sin, sinc, sinh,
                                         matrix_exp, sqrt, rsqrt, square, trace, nextafter, abs, acos, acosh, angle,
                                         asin, asinh, atan, atan2, atanh, ceil, equal, erf, erfc, erfinv, exp, expm1,
                                         floor, floor_divide, floor_mod, gcd, greater, greater_equal, less, less_equal,
-                                         log, log1p, neg, not_equal, pow, round, isfinite, argmax)
+                                         log, log1p, neg, not_equal, pow, round, isfinite, argmax, mean, sum_ext)
 from mindspore.nn import layer
 from mindspore._checkparam import check_is_number
 from mindspore import _checkparam as validator
@ -6636,94 +6636,6 @@ def amax(input, axis=None, keepdims=False, *, initial=None, where=None):
    return _get_cache_prim(P.ReduceMax)(keepdims)(input, axis)


-def mean(x, axis=None, keep_dims=False):
-    r"""
-    Reduces all dimension of a tensor by averaging all elements in the dimension, by default.
-    And reduce a dimension of `input` along the specified `axis`. `keep_dims`
-    determines whether the dimensions of the output and input are the same.
-
-    Note:
-        The `axis` with tensor type is only used for compatibility with older versions and is not recommended.
-
-    Args:
-        x (Tensor[Number]): The input tensor. The dtype of the tensor to be reduced is number.
-            :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        axis (Union[int, tuple(int), list(int), Tensor]): The dimensions to reduce. Default: ``None`` ,
-            reduce all dimensions. Only constant value is allowed. Assume the rank of `input` is r,
-            and the value range is [-r,r).
-        keep_dims (bool): If ``True`` , keep these reduced dimensions and the length is 1.
-            If ``False`` , don't keep these dimensions. Default: ``False`` .
-
-    Returns:
-        Tensor, has the same data type as input tensor.
-
-        - If `axis` is ``None`` , and `keep_dims` is ``False`` ,
-            the output is a 0-D tensor representing the product of all elements in the input tensor.
-        - If `axis` is int, set as 1, and `keep_dims` is ``False`` ,
-            the shape of output is :math:`(x_0, x_2, ..., x_R)`.
-        - If `axis` is tuple(int), set as (1, 2), and `keep_dims` is ``False`` ,
-            the shape of output is :math:`(x_0, x_3, ..., x_R)`.
-        - If `axis` is 1-D Tensor, set as [1, 2], and `keep_dims` is ``False`` ,
-            the shape of output is :math:`(x_0, x_3, ..., x_R)`.
-
-    Raises:
-        TypeError: If `x` is not a Tensor.
-        TypeError: If `axis` is not one of the following: int, tuple, list or Tensor.
-        TypeError: If `keep_dims` is not a bool.
-        ValueError: If `axis` is out of range.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
-        >>> output = ops.mean(x, 1, keep_dims=True)
-        >>> result = output.shape
-        >>> print(result)
-        (3, 1, 5, 6)
-        >>> # case 1: Reduces a dimension by averaging all elements in the dimension.
-        >>> x = Tensor(np.array([[[2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2]],
-        ... [[4, 4, 4, 4, 4, 4], [5, 5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]],
-        ... [[6, 6, 6, 6, 6, 6], [8, 8, 8, 8, 8, 8], [10, 10, 10, 10, 10, 10]]]),
-        ... mindspore.float32)
-        >>> output = ops.mean(x)
-        >>> print(output)
-        5.0
-        >>> print(output.shape)
-        ()
-        >>> # case 2: Reduces a dimension along the axis 0
-        >>> output = ops.mean(x, 0, True)
-        >>> print(output)
-        [[[4. 4. 4. 4. 4. 4.]
-        [5. 5. 5. 5. 5. 5.]
-        [6. 6. 6. 6. 6. 6.]]]
-        >>> # case 3: Reduces a dimension along the axis 1
-        >>> output = ops.mean(x, 1, True)
-        >>> print(output)
-        [[[2. 2. 2. 2. 2. 2.]]
-        [[5. 5. 5. 5. 5. 5.]]
-        [[8. 8. 8. 8. 8. 8.]]]
-        >>> # case 4: Reduces a dimension along the axis 2
-        >>> output = ops.mean(x, 2, True)
-        >>> print(output)
-        [[[ 2.]
-        [ 2.]
-        [ 2.]]
-        [[ 4.]
-        [ 5.]
-        [ 6.]]
-        [[ 6.]
-        [ 8.]
-        [10.]]]
-    """
-    if axis is None:
-        axis = ()
-    return _get_cache_prim(P.ReduceMean)(keep_dims)(x, axis)
-
-
 def prod(input, axis=None, keep_dims=False):
    r"""
    Reduces a dimension of a tensor by multiplying all elements in the dimension, by default. And also can
@ -10055,23 +9967,7 @@ def sum(input, dim=None, keepdim=False, *, dtype=None):
         [48.]
         [54.]]]
    """
-    if not isinstance(input, Tensor):
-        raise TypeError(f"For 'sum', 'input' must be Tensor, but got{type(input)}")
-    if dim is not None and not isinstance(dim, (int, tuple, list)):
-        raise TypeError(f"For 'sum', 'dim' must be int, tuple(int), list(int) or None, but got {type(dim)}")
-    if not isinstance(keepdim, bool):
-        raise TypeError(f"For 'sum', 'keepdim' must be bool, but got {type(keepdim)}")
-
-    if input.dtype == mstype.bool_:
-        input = input.astype(mstype.int64)
-    if dtype is not None:
-        input = input.astype(dtype)
-    reduce_sum = _get_cache_prim(P.ReduceSum)(keep_dims=keepdim)
-    if dim is not None:
-        out = reduce_sum(input, dim)
-    else:
-        out = reduce_sum(input)
-    return out
+    return sum_ext(input, dim, keepdim, dtype)


 def tanhshrink(input):
--- a/mindspore/python/mindspore/ops/function/nn_func.py
+++ b/mindspore/python/mindspore/ops/function/nn_func.py
@ -3003,7 +3003,7 @@ def bidense(input1, input2, weight, bias=None):
    output = output.transpose(2, 0, 1) * input2
    output = output.sum(2).swapaxes(0, 1)
    if bias is not None:
-        output = bias_add_(output, bias)
+        output = bias_add_(output.astype(bias.dtype), bias)
    if len(input1_shape) != 2:
        output_shape = input1_shape[:-1] + (-1,)
        output = output.reshape(output_shape)
--- a/mindspore/python/mindspore/ops/operations/manually_defined/ops_def.py
+++ b/mindspore/python/mindspore/ops/operations/manually_defined/ops_def.py
@ -1248,6 +1248,35 @@ def _infer_value_for_Reduce(input_x, axis, keep_dims, prim_name):
    return value


+def _infer_value_for_ReduceExtand(input_x, axis, keep_dims, dtype, prim_name):
+    """Infer value for Common ReduceExtand op."""
+    value = None
+    if input_x is not None:
+        prim_map = {
+            'MeanExt': np.mean,
+            'SumExt': np.sum,
+        }
+        np_reduce_extand_func = prim_map.get(prim_name, None)
+
+        if np_reduce_extand_func is not None:
+            value = input_x.asnumpy()
+            if isinstance(axis, int):
+                pass
+            elif axis:
+                axis = tuple(set(axis))
+            else:
+                axis = tuple(range(len(value.shape)))
+            if dtype is not None:
+                np_dtype = mstype.dtype_to_nptype(typing.type_id_to_type(dtype))
+                value = np_reduce_extand_func(value, axis, dtype=np_dtype, keepdims=keep_dims)
+            else:
+                value = np_reduce_extand_func(value, axis, keepdims=keep_dims)
+
+            value = np.array(value)
+            value = Tensor(value)
+    return value
+
+
 def infer_value_for_Cast(x, dst_type_enum):
    """Infer value for Cast op."""
    if x is None:
@ -1304,6 +1333,16 @@ def infer_value_for_ReduceAny(input_x, axis, keep_dims):
    return _infer_value_for_Reduce(input_x, axis, keep_dims, 'ReduceAny')


+def infer_value_for_MeanExt(input_x, axis, keep_dims, dtype):
+    """Infer value for MeanExt op."""
+    return _infer_value_for_ReduceExtand(input_x, axis, keep_dims, dtype, 'MeanExt')
+
+
+def infer_value_for_SumExt(input_x, axis, keep_dims, dtype):
+    """Infer value for SumExt op."""
+    return _infer_value_for_ReduceExtand(input_x, axis, keep_dims, dtype, 'SumExt')
+
+
 def infer_value_for_Diag(input_x):
    """Infer value for Diag op."""
    if input_x is None:
--- a/tests/st/graph_syntax/python_builtin_functions/test_sum.py
+++ b/tests/st/graph_syntax/python_builtin_functions/test_sum.py
@ -187,3 +187,22 @@ def test_fallback_sum_with_x_unsupported_operand_type_error_2():
    with pytest.raises(TypeError) as ex:
        foo()
    assert "unsupported operand type" in str(ex.value)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_fallback_sum_with_x_tensor_n_default_2():
+    """
+    Feature: JIT Fallback
+    Description: Test sum() in graph mode with input x tensor and input n default.
+    Expectation: No exception.
+    """
+    @jit
+    def foo():
+        x = sum(Tensor([[1, 1], [2, 2]]))
+        return x
+    out = foo()
+    assert np.allclose(out.asnumpy(), np.array([3, 3]))
--- a/tests/st/ops/test_ops_reduce_extand.py
+++ b/tests/st/ops/test_ops_reduce_extand.py
@ -0,0 +1,486 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import pytest
+import numpy as np
+import mindspore as ms
+import mindspore.ops.function as F
+import mindspore.common.dtype as mstype
+from mindspore import ops
+from mindspore import Tensor
+from mindspore.ops.function import mean, prod
+from tests.st.utils import test_utils
+from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP
+
+
+def generate_random_input(shape, dtype):
+    return np.random.randn(*shape).astype(dtype)
+
+
+def generate_expect_forward_output(name, x, axis=None, keep_dims=False, dtype=None):
+    if name == "mean":
+        return np.mean(x, axis=axis, dtype=dtype, keepdims=keep_dims)
+    if name == "prod":
+        return np.prod(x, axis=axis, dtype=dtype, keepdims=keep_dims)
+    if name == "sum":
+        return np.sum(x, axis=axis, dtype=dtype, keepdims=keep_dims)
+    return None
+
+
+def mean_func(x, axis=None, keep_dims=False, dtype=None):
+    return mean(x, axis, keep_dims, dtype)
+
+
+def sum_func(x, axis=None, keep_dims=False, dtype=None):
+    return F.sum(x, axis, keep_dims, dtype=dtype)
+
+
+def prod_func(x, axis=None, keep_dims=False, dtype=None):
+    return prod(x, axis, keep_dims, dtype)
+
+
+@test_utils.run_with_cell
+def mean_forward_func(x, axis=None, keep_dims=False, dtype=None):
+    return mean_func(x, axis, keep_dims, dtype)
+
+
+@test_utils.run_with_cell
+def sum_forward_func(x, axis=None, keep_dims=False, dtype=None):
+    return sum_func(x, axis, keep_dims, dtype=dtype)
+
+
+@test_utils.run_with_cell
+def prod_forward_func(x, axis=None, keep_dims=False, dtype=None):
+    return prod(x, axis, keep_dims, dtype)
+
+
+@test_utils.run_with_cell
+def mean_backward_func(x, axis=None, keep_dims=False, dtype=None):
+    return ops.grad(mean_forward_func, (0))(x, axis, keep_dims, dtype)
+
+
+@test_utils.run_with_cell
+def sum_backward_func(x, axis=None, keep_dims=False, dtype=None):
+    return ops.grad(sum_forward_func, (0))(x, axis, keep_dims, dtype)
+
+
+@test_utils.run_with_cell
+def prod_backward_func(x, axis=None, keep_dims=False, dtype=None):
+    return ops.grad(prod_forward_func, (0))(x, axis, keep_dims, dtype)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('keep_dims', [False, True])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_mean_normal(keep_dims, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function mean forward and backward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    axis = (0, -1)
+    x = generate_random_input((2, 3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    output = mean_forward_func(ms.Tensor(x), axis, keep_dims, out_dtype)
+    expect = generate_expect_forward_output("mean", x, axis, keep_dims, mstype.dtype_to_nptype(out_dtype))
+    np.testing.assert_equal(output.dtype, out_dtype)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    axis = (0, -1)
+    x = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(mstype.dtype_to_nptype(in_dtype))
+    grads = mean_backward_func(ms.Tensor(x), axis, False, out_dtype)
+    expect = np.full((2, 3, 4), 1 / (2 * 4), mstype.dtype_to_nptype(in_dtype))
+    np.testing.assert_equal(grads.dtype, in_dtype)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_mean_default(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function mean forward and backward on ascend with default args.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4, 5), np.float32)
+    output = mean_forward_func(ms.Tensor(x))
+    expect = generate_expect_forward_output("mean", x)
+    np.testing.assert_equal(output.dtype, mstype.float32)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    x1 = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(np.float32)
+    grads = mean_backward_func(ms.Tensor(x1))
+    expect = np.full((2, 3, 4), 1 / (2 * 3 * 4), np.float32)
+    np.testing.assert_equal(grads.dtype, mstype.float32)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_mean_dynamic(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function mean with dynamic shape and rank.
+    Expectation: expect correct result.
+    """
+    input1 = Tensor(generate_random_input((2, 3, 4, 5), np.float32))
+    axis1 = (0, -1)
+    input2 = Tensor(generate_random_input((3, 3, 4, 4), np.float32))
+    axis2 = (0, -1)
+    TEST_OP(mean_func, [[input1, axis1], [input2, axis2]], mode=context_mode, grad=True)
+
+    input3 = Tensor(generate_random_input((3, 4, 5), np.float16))
+    axis3 = ()
+    keep_dims3 = False
+    dtype3 = mstype.float32
+    input4 = Tensor(generate_random_input((3, 4), np.float16))
+    axis4 = ()
+    keep_dims4 = False
+    dtype4 = mstype.float32
+    TEST_OP(mean_func, [[input3, axis3, keep_dims3, dtype3], [input4, axis4, keep_dims4, dtype4]],
+            nontensor_dynamic_type='None', mode=context_mode, grad=True, test_resize=False)
+
+    input5 = Tensor(generate_random_input((2, 3, 4), np.float32))
+    input6 = Tensor(generate_random_input((2, 3), np.float32))
+    TEST_OP(mean_func, [[input5], [input6]], mode=context_mode, grad=True)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('keep_dims', [False, True])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_sum_normal(keep_dims, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function sum forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    axis = (0, -1)
+    x = generate_random_input((2, 3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    output = sum_forward_func(ms.Tensor(x), axis, keep_dims, out_dtype)
+    expect = generate_expect_forward_output("sum", x, axis, keep_dims, mstype.dtype_to_nptype(out_dtype))
+    np.testing.assert_equal(output.dtype, out_dtype)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    axis = (0, -1)
+    x = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(mstype.dtype_to_nptype(in_dtype))
+    grads = sum_backward_func(ms.Tensor(x), axis, False, out_dtype)
+    expect = np.ones((2, 3, 4), mstype.dtype_to_nptype(in_dtype))
+    np.testing.assert_equal(grads.dtype, in_dtype)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_sum_default(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function sum on ascend with default args.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4, 5), np.float32)
+    output = sum_forward_func(ms.Tensor(x))
+    expect = generate_expect_forward_output("sum", x)
+    np.testing.assert_equal(output.dtype, mstype.float32)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    x1 = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(np.float32)
+    grads = sum_backward_func(ms.Tensor(x1))
+    expect = np.ones((2, 3, 4), np.float32)
+    np.testing.assert_equal(grads.dtype, mstype.float32)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_sum_dynamic(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function sum with dynamic shape and rank.
+    Expectation: expect correct result.
+    """
+    input1 = Tensor(generate_random_input((2, 3, 4, 5), np.float32))
+    axis1 = (0, -1)
+    input2 = Tensor(generate_random_input((3, 3, 4, 4), np.float32))
+    axis2 = (0, -1)
+    TEST_OP(sum_func, [[input1, axis1], [input2, axis2]], mode=context_mode, grad=True)
+
+    input3 = Tensor(generate_random_input((3, 4, 5), np.float32))
+    axis3 = ()
+    keep_dims3 = False
+    dtype3 = mstype.int32
+    input4 = Tensor(generate_random_input((3, 4), np.float32))
+    axis4 = ()
+    keep_dims4 = False
+    dtype4 = mstype.int64
+    TEST_OP(sum_func, [[input3, axis3, keep_dims3, dtype3], [input4, axis4, keep_dims4, dtype4]],
+            nontensor_dynamic_type='None', mode=context_mode, grad=True, test_resize=False)
+
+    input5 = Tensor(generate_random_input((2, 3, 4), np.float32))
+    input6 = Tensor(generate_random_input((2, 3), np.float32))
+    TEST_OP(sum_func, [[input5], [input6]], mode=context_mode, grad=True)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('axis', [(-1), ()])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32, mstype.int8, mstype.uint8, mstype.complex128])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_sum_vaild_dtype(axis, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function sum forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4), mstype.dtype_to_nptype(in_dtype))
+    output = sum_forward_func(ms.Tensor(x), axis, False, out_dtype)
+    np.testing.assert_equal(output.dtype, out_dtype)
+
+    x1 = generate_random_input((3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    grads = sum_backward_func(ms.Tensor(x1), axis, False, out_dtype)
+    np.testing.assert_equal(grads.dtype, in_dtype)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('axis', [(-1), ()])
+@pytest.mark.parametrize('in_dtype', [mstype.bool_, mstype.int8, mstype.int16, mstype.int32, mstype.uint8])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+def test_sum_default_dtype(axis, in_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function sum forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4), mstype.dtype_to_nptype(in_dtype))
+    output = sum_forward_func(ms.Tensor(x), axis, False, None)
+    np.testing.assert_equal(output.dtype, mstype.int64)
+
+    x1 = generate_random_input((3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    grads = sum_backward_func(ms.Tensor(x1), axis, False, None)
+    np.testing.assert_equal(grads.dtype, in_dtype)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('keep_dims', [False, True])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_normal(keep_dims, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    axis = 0
+    x = generate_random_input((2, 3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    output = prod_forward_func(ms.Tensor(x), axis, keep_dims, out_dtype)
+    expect = generate_expect_forward_output("prod", x, axis, keep_dims, mstype.dtype_to_nptype(out_dtype))
+    np.testing.assert_equal(output.dtype, out_dtype)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    axis = -1
+    x = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(mstype.dtype_to_nptype(in_dtype))
+    grads = prod_backward_func(ms.Tensor(x), axis, False, out_dtype)
+    expect = np.array([[[6.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
+                        [2.1000e+02, 1.6800e+02, 1.4000e+02, 1.2000e+02],
+                        [9.9000e+02, 8.8000e+02, 7.9200e+02, 7.2000e+02]],
+                       [[2.7300e+03, 2.5200e+03, 2.3400e+03, 2.1840e+03],
+                        [5.8140e+03, 5.4720e+03, 5.1680e+03, 4.8960e+03],
+                        [1.0626e+04, 1.0120e+04, 9.6600e+03, 9.2400e+03]]]).astype(mstype.dtype_to_nptype(in_dtype))
+    np.testing.assert_equal(grads.dtype, in_dtype)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('axis', [0, -1])
+@pytest.mark.parametrize('keep_dims', [False, True])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_normal_1d(axis, keep_dims, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = np.random.randn(5).astype(mstype.dtype_to_nptype(in_dtype))
+    output = prod_forward_func(ms.Tensor(x), axis, keep_dims, out_dtype)
+    expect = generate_expect_forward_output("prod", x, axis, keep_dims, mstype.dtype_to_nptype(out_dtype))
+    np.testing.assert_equal(output.dtype, out_dtype)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-2)
+
+
+@pytest.mark.level0
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_default(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod on ascend with default args.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4, 5), np.float32)
+    output = prod_forward_func(ms.Tensor(x))
+    expect = generate_expect_forward_output("prod", x)
+    np.testing.assert_equal(output.dtype, mstype.float32)
+    np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3)
+
+    x1 = np.arange(2 * 3).reshape(2, 3).astype(np.float32)
+    grads = prod_backward_func(ms.Tensor(x1))
+    expect = np.array([[120, 0, 0], [0, 0, 0]]).astype(np.float32)
+    np.testing.assert_equal(grads.dtype, mstype.float32)
+    np.testing.assert_allclose(grads.asnumpy(), expect, rtol=1e-3)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_dynamic(context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod with dynamic shape and rank.
+    Expectation: expect correct result.
+    """
+    input1 = Tensor(generate_random_input((2, 3, 4, 5), np.float32))
+    axis1 = -1
+    input2 = Tensor(generate_random_input((3, 3, 4, 4), np.float32))
+    axis2 = -1
+    TEST_OP(prod_func, [[input1, axis1], [input2, axis2]], mode=context_mode, grad=True)
+
+    input3 = Tensor(generate_random_input((3, 4, 5), np.float32))
+    axis3 = 0
+    keep_dims3 = False
+    dtype3 = mstype.int32
+    input4 = Tensor(generate_random_input((3, 4), np.float32))
+    axis4 = 0
+    keep_dims4 = False
+    dtype4 = mstype.int64
+    TEST_OP(prod_func, [[input3, axis3, keep_dims3, dtype3], [input4, axis4, keep_dims4, dtype4]],
+            nontensor_dynamic_type='None', mode=context_mode, grad=True, test_resize=False)
+
+    input5 = Tensor(generate_random_input((2, 3, 4), np.float32))
+    input6 = Tensor(generate_random_input((2, 3), np.float32))
+    TEST_OP(prod_func, [[input5], [input6]], mode=context_mode, grad=True)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('axis', [-1, None])
+@pytest.mark.parametrize('in_dtype', [mstype.float16])
+@pytest.mark.parametrize('out_dtype', [mstype.float32, mstype.int8, mstype.uint8])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_vaild_dtype(axis, in_dtype, out_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4), mstype.dtype_to_nptype(in_dtype))
+    output = prod_forward_func(ms.Tensor(x), axis, False, out_dtype)
+    np.testing.assert_equal(output.dtype, out_dtype)
+
+    x1 = generate_random_input((3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    grads = prod_backward_func(ms.Tensor(x1), axis, False, out_dtype)
+    np.testing.assert_equal(grads.dtype, in_dtype)
+
+
+@pytest.mark.level1
+@pytest.mark.env_onecard
+@pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.parametrize('axis', [-1, None])
+@pytest.mark.parametrize('in_dtype', [mstype.int8, mstype.int16, mstype.int32, mstype.uint8])
+@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE])
+@pytest.mark.skip(reason="No support yet")
+def test_prod_default_dtype(axis, in_dtype, context_mode):
+    """
+    Feature: pyboost function.
+    Description: test function prod forward on ascend with different datatype.
+    Expectation: expect correct result.
+    """
+    ms.context.set_context(mode=context_mode)
+    x = generate_random_input((2, 3, 4), mstype.dtype_to_nptype(in_dtype))
+    output = prod_forward_func(ms.Tensor(x), axis, False, None)
+    np.testing.assert_equal(output.dtype, mstype.int64)
+
+    x1 = generate_random_input((3, 4, 5), mstype.dtype_to_nptype(in_dtype))
+    grads = prod_backward_func(ms.Tensor(x1), axis, False, None)
+    np.testing.assert_equal(grads.dtype, in_dtype)
--- a/tests/ut/cpp/ops/test_ops_reduce_extand.cc
+++ b/tests/ut/cpp/ops/test_ops_reduce_extand.cc
@ -0,0 +1,271 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <memory>
+#include "common/common_test.h"
+#include "ops/ops_func_impl/mean_ext.h"
+#include "ops/ops_func_impl/sum_ext.h"
+// #include "ops/ops_func_impl/prod_ext.h"
+#include "ops/auto_generate/gen_ops_name.h"
+#include "ops/test_ops.h"
+#include "ops/test_ops_cmp_utils.h"
+#include "ops/test_value_utils.h"
+#include "abstract/abstract_value.h"
+#include "abstract/ops/primitive_infer_map.h"
+
+namespace mindspore {
+namespace ops {
+namespace {
+struct ReduceExtandParams {
+  ShapeVector input_shape;
+  TypePtr input_type;
+  ShapeVector output_shape;
+  TypePtr output_type;
+  AbstractBasePtr axis;
+  AbstractBasePtr keep_dims;
+  AbstractBasePtr dtype;
+};
+
+static auto value_any = std::make_shared<abstract::AbstractScalar>(kValueAny, kTypeAny);
+static auto value_none = std::make_shared<abstract::AbstractScalar>(kValueAny, kTypeNone);
+static auto keep_dims_true = std::make_shared<BoolImm>(true)->ToAbstract();
+static auto keep_dims_false = std::make_shared<BoolImm>(false)->ToAbstract();
+static auto dtype_float64 = std::make_shared<Int64Imm>(kNumberTypeFloat64)->ToAbstract();
+static auto dtype_int32 = std::make_shared<Int64Imm>(kNumberTypeInt32)->ToAbstract();
+static auto dtype_int16 = std::make_shared<Int64Imm>(kNumberTypeInt16)->ToAbstract();
+static auto dtype_int8 = std::make_shared<Int64Imm>(kNumberTypeInt8)->ToAbstract();
+static auto dtype_uint8 = std::make_shared<Int64Imm>(kNumberTypeUInt8)->ToAbstract();
+static auto dtype_bool = std::make_shared<Int64Imm>(kNumberTypeBool)->ToAbstract();
+
+AbstractBasePtr CreateInt(const int &value) {
+  return CreatePyInt(value)->ToAbstract();
+}
+
+AbstractBasePtr CreateIntTuple(const std::vector<NumberContainer> &value) {
+  return CreatePyIntTuple(value)->ToAbstract();
+}
+
+template <typename T>
+tensor::TensorPtr CreateTensor(const ShapeVector &shape, const TypeId &dtype, std::vector<T> value) {
+  void *data_ptr = &value[0];
+  auto tensor = std::make_shared<tensor::Tensor>(dtype, shape, data_ptr, dtype);
+  return tensor;
+}
+
+static std::map<std::string, OpFuncImplPtr> reduce_extand_func_impl = {
+  {kNameMeanExt, std::make_shared<MeanExtFuncImpl>()},
+  {kNameSumExt, std::make_shared<SumExtFuncImpl>()},
+//   {kNameProdExt, std::make_shared<ProdExtFuncImpl>()},
+};
+}  // namespace
+
+class TestReduceExtand : public TestOps, public testing::WithParamInterface<std::tuple<const char *, ReduceExtandParams>> {};
+
+TEST_P(TestReduceExtand, dyn_shape) {
+  const auto &op_name = std::get<0>(GetParam());
+  const auto &param = std::get<1>(GetParam());
+  ASSERT_TRUE(reduce_extand_func_impl.find(op_name) != reduce_extand_func_impl.end());
+  auto op_impl = reduce_extand_func_impl[op_name];
+  ASSERT_NE(op_impl, nullptr);
+
+  auto prim = std::make_shared<Primitive>(op_name);
+  ASSERT_NE(prim, nullptr);
+  auto input = std::make_shared<abstract::AbstractTensor>(param.input_type, param.input_shape);
+  ASSERT_NE(input, nullptr);
+  auto input_args = std::vector<AbstractBasePtr>{input, param.axis, param.keep_dims, param.dtype};
+
+  auto expect_shape = std::make_shared<abstract::TensorShape>(param.output_shape);
+  ASSERT_NE(expect_shape, nullptr);
+  auto expect_type = std::make_shared<TensorType>(param.output_type);
+  ASSERT_NE(expect_type, nullptr);
+
+  auto out_shape = op_impl->InferShape(prim, input_args);
+  auto out_type = op_impl->InferType(prim, input_args);
+
+  ShapeCompare(out_shape, expect_shape);
+  TypeCompare(out_type, expect_type);
+}
+
+auto ReduceExtandTestCase = testing::ValuesIn(
+  {ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 1, 4}, kFloat32, CreateIntTuple({1}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 4}, kFloat32, CreateIntTuple({1}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {1, 1, 4}, kFloat32, CreateIntTuple({0, 1}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {4}, kFloat32, CreateIntTuple({0, 1}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 3, 1}, kFloat32, CreateIntTuple({-1}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 4}, kFloat32, CreateIntTuple({-2}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 1, 1}, kFloat32, CreateIntTuple({-1, -2}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {4}, kFloat32, CreateIntTuple({-2, -3}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1, 1, -1}, kFloat32, CreateIntTuple({kValueAny, 1}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1}, kFloat32, CreateIntTuple({kValueAny, 1}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1, -1, -1}, kFloat32, CreateIntTuple({kValueAny, kValueAny}), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1}, kFloat32, CreateIntTuple({kValueAny, kValueAny}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1, -1, -1}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-2}, kFloat32, value_any, keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {}, kFloat32, CreateIntTuple({}), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-2}, kFloat32, CreateIntTuple({1}), value_any, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-2}, kFloat32, CreateIntTuple({1, 2}), value_any, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, 1, 4}, kFloat32, CreateIntTuple({1}), keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, 4}, kFloat32, CreateIntTuple({1}), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, 3, 4}, kFloat32, {1, 3, 1}, kFloat32, CreateIntTuple({0, 2}), keep_dims_true, value_none},
+   ReduceExtandParams{{-1, 3, 4}, kFloat32, {3}, kFloat32, CreateIntTuple({0, 2}), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, 1, -1}, kFloat32, CreateIntTuple({kValueAny, 1}), keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1}, kFloat32, CreateIntTuple({kValueAny, 1}), keep_dims_false,  value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, -1, -1}, kFloat32, CreateIntTuple({kValueAny, kValueAny}), keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1}, kFloat32, CreateIntTuple({kValueAny, kValueAny}), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, -1, -1}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-2}, kFloat32, value_any, keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {}, kFloat32, CreateIntTuple({}), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-2}, kFloat32, CreateIntTuple({1}), value_any, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-2}, kFloat32, CreateIntTuple({1, 2}), value_any, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, CreateIntTuple({1}), keep_dims_true, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, CreateIntTuple({0, 2}), keep_dims_false, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, CreateIntTuple({kValueAny, 1}), keep_dims_true, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {}, kFloat32, CreateIntTuple({}), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, -1}, kFloat32, {1, 1, 1}, kFloat64, value_none, keep_dims_true, dtype_float64},
+   ReduceExtandParams{{-1, -1, -1}, kFloat32, {}, kFloat64, value_none, keep_dims_false, dtype_float64},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat64, value_none, keep_dims_true, dtype_float64},
+   ReduceExtandParams{{-2}, kFloat32, {}, kFloat64, value_none, keep_dims_false, dtype_float64},
+   ReduceExtandParams{{-1, -1, -1}, kFloat32, {-2}, kFloat64, value_none, value_any, dtype_float64},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat64, value_none, value_any, dtype_float64},
+   ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, CreateIntTuple({0}), value_any, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, CreateIntTuple({0}), keep_dims_true, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, CreateIntTuple({0}), keep_dims_false, value_none},
+   ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, value_any, keep_dims_false, value_none}});
+
+auto ReduceExtandTestCase_ProdExt = testing::ValuesIn(
+  {ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 1, 4}, kFloat32, CreateInt(1), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 4}, kFloat32, CreateInt(1), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 3, 1}, kFloat32, CreateInt(-1), keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {2, 4}, kFloat32, CreateInt(-2), keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1, -1, -1}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-1, -1}, kFloat32, value_any, keep_dims_false, value_none},
+   ReduceExtandParams{{2, 3, 4}, kFloat32, {-2}, kFloat32, CreateInt(1), value_any, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, 1, 4}, kFloat32, CreateInt(1), keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, 4}, kFloat32, CreateInt(1), keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, -1, -1}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-1, -1}, kFloat32, value_any, keep_dims_false, value_none},
+   ReduceExtandParams{{-1, -1, 4}, kFloat32, {-2}, kFloat32, CreateInt(1), value_any, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, CreateInt(1), keep_dims_true, value_none},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{-1, -1, -1}, kFloat32, {}, kFloat64, value_none, keep_dims_false, dtype_float64},
+   ReduceExtandParams{{-2}, kFloat32, {}, kFloat64, value_none, keep_dims_false, dtype_float64},
+   ReduceExtandParams{{-1, -1, -1}, kFloat32, {-2}, kFloat64, value_none, value_any, dtype_float64},
+   ReduceExtandParams{{-2}, kFloat32, {-2}, kFloat64, value_none, value_any, dtype_float64},
+   ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, CreateInt(0), value_any, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, CreateInt(0), keep_dims_true, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, CreateInt(0), keep_dims_false, value_none},
+   ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, value_any, keep_dims_true, value_none},
+   ReduceExtandParams{{}, kFloat32, {}, kFloat32, value_any, keep_dims_false, value_none}});
+
+auto ReduceExtandTestCase_ExtraDtype = testing::ValuesIn(
+  {ReduceExtandParams{{}, kFloat32, {-2}, kFloat32, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kComplex64, {-2}, kComplex64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kInt32, {-2}, kInt64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kInt16, {-2}, kInt64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kInt8, {-2}, kInt64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kUInt8, {-2}, kInt64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kBool, {-2}, kInt64, value_any, value_any, value_none},
+   ReduceExtandParams{{}, kInt32, {-2}, kBool, value_any, value_any, dtype_bool},
+   ReduceExtandParams{{}, kInt16, {-2}, kUInt8, value_any, value_any, dtype_uint8},
+   ReduceExtandParams{{}, kInt8, {-2}, kBool, value_any, value_any, dtype_bool},
+   ReduceExtandParams{{}, kUInt8, {-2}, kInt16, value_any, value_any, dtype_int16},
+   ReduceExtandParams{{}, kBool, {-2}, kInt32, value_any, value_any, dtype_int32}});
+
+INSTANTIATE_TEST_CASE_P(TestMeanExtGroup, TestReduceExtand,
+                        testing::Combine(testing::ValuesIn({kNameMeanExt}), ReduceExtandTestCase));
+INSTANTIATE_TEST_CASE_P(TestSumExtGroup, TestReduceExtand,
+                        testing::Combine(testing::ValuesIn({kNameSumExt}), ReduceExtandTestCase));
+INSTANTIATE_TEST_CASE_P(TestSumExtGroup_ExtraDtype, TestReduceExtand,
+                        testing::Combine(testing::ValuesIn({kNameSumExt}), ReduceExtandTestCase_ExtraDtype));
+// INSTANTIATE_TEST_CASE_P(TestProdExtGroup, TestReduceExtand,
+//                         testing::Combine(testing::ValuesIn({kNameProdExt}), ReduceExtandTestCase_ProdExt));
+// INSTANTIATE_TEST_CASE_P(TestProdExtGroup_ExtraDtype, TestReduceExtand,
+//                         testing::Combine(testing::ValuesIn({kNameProdExt}), ReduceExtandTestCase_ExtraDtype));
+
+struct ReduceExtandInferValueParams {
+  tensor::TensorPtr input;
+  AbstractBasePtr axis;
+  AbstractBasePtr keep_dims;
+  AbstractBasePtr dtype;
+  tensor::TensorPtr out;
+};
+
+class TestReduceExtandInferValue : public TestOps, public testing::WithParamInterface<std::tuple<const char *, ReduceExtandInferValueParams>> {};
+
+TEST_P(TestReduceExtandInferValue, dyn_shape_infer_value) {
+  const auto &op_name = std::get<0>(GetParam());
+  const auto &param = std::get<1>(GetParam());
+
+  auto primitive = std::make_shared<Primitive>(op_name);
+  ASSERT_NE(primitive, nullptr);
+
+  ASSERT_NE(param.input, nullptr);
+  auto input = param.input->ToAbstract();
+  ASSERT_NE(input, nullptr);
+
+  auto input_args = abstract::AbstractBasePtrList{input, param.axis, param.keep_dims, param.dtype};
+  auto value_opt = abstract::InferValueByFuncImpl(primitive, input_args);
+  if (!value_opt.has_value()) {
+    MS_LOG(ERROR) << op_name << " have no infer value implement!";
+    ASSERT_TRUE(false);
+  }
+  auto infer_out = value_opt.value();
+  if (infer_out == nullptr) {
+    MS_LOG(ERROR) << op_name << " can not infer value with inputs: " << input_args;
+    ASSERT_TRUE(false);
+  }
+  auto infer_tensor = infer_out->cast<tensor::TensorPtr>();
+  ASSERT_NE(infer_tensor, nullptr);
+  ASSERT_TRUE(infer_tensor->ValueEqual(*param.out));
+}
+
+auto ReduceExtandInferValueTestCase_MeanExt = testing::ValuesIn(
+  {ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    CreateIntTuple({0}), keep_dims_false, dtype_float64,
+    CreateTensor<double>(ShapeVector{2}, kNumberTypeFloat64, std::vector<double>{3, 4})},
+   ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    value_none, keep_dims_false, dtype_float64,
+    CreateTensor<double>(ShapeVector{}, kNumberTypeFloat64, std::vector<double>{3.5})},
+   ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    CreateIntTuple({0}), keep_dims_true, value_none,
+    CreateTensor<float>(ShapeVector{1, 2}, kNumberTypeFloat32, std::vector<float>{3, 4})}});
+
+auto ReduceExtandInferValueTestCase_SumExt = testing::ValuesIn(
+  {ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    CreateIntTuple({0}), keep_dims_false, dtype_float64,
+    CreateTensor<double>(ShapeVector{2}, kNumberTypeFloat64, std::vector<double>{6, 8})},
+   ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    value_none, keep_dims_false, dtype_float64,
+    CreateTensor<double>(ShapeVector{}, kNumberTypeFloat64, std::vector<double>{14})},
+   ReduceExtandInferValueParams{
+    CreateTensor<float>(ShapeVector{2, 2}, kNumberTypeFloat32, std::vector<float>{2, 3, 4, 5}),
+    CreateIntTuple({0}), keep_dims_true, value_none,
+    CreateTensor<float>(ShapeVector{1, 2}, kNumberTypeFloat32, std::vector<float>{6, 8})}});
+
+INSTANTIATE_TEST_CASE_P(TestMeanExtInferValueGroup, TestReduceExtandInferValue,
+                        testing::Combine(testing::ValuesIn({kNameMeanExt}), ReduceExtandInferValueTestCase_MeanExt));
+INSTANTIATE_TEST_CASE_P(TestSumExtInferValueGroup, TestReduceExtandInferValue,
+                        testing::Combine(testing::ValuesIn({kNameSumExt}), ReduceExtandInferValueTestCase_SumExt));
+}  // namespace ops
+}  // namespace mindspore
--- a/tests/ut/python/graph_syntax/python_builtin_functions/init.py
+++ b/tests/ut/python/graph_syntax/python_builtin_functions/init.py
@ -0,0 +1,21 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""setup for pytest"""
+import mindspore.context as context
+
+
+# pylint: disable=unused-argument
+def setup_module(module):
+    context.set_context(mode=context.GRAPH_MODE)
--- a/tests/ut/python/graph_syntax/python_builtin_functions/test_sum.py
+++ b/tests/ut/python/graph_syntax/python_builtin_functions/test_sum.py
@ -75,20 +75,6 @@ def test_fallback_sum_with_x_tensor_n_default():
    assert out.asnumpy() == 6


-def test_fallback_sum_with_x_tensor_n_default_2():
-    """
-    Feature: JIT Fallback
-    Description: Test sum() in graph mode with input x tensor and input n default.
-    Expectation: No exception.
-    """
-    @jit
-    def foo():
-        x = sum(Tensor([[1, 1], [2, 2]]))
-        return x
-    out = foo()
-    assert np.allclose(out.asnumpy(), np.array([3, 3]))
-
-
 def test_fallback_sum_with_x_numpy_array_n_default_2():
    """
    Feature: JIT Fallback