!43978 Fix doc && bug of CumSum.

Merge pull request !43978 from hezhenhao1/add_cumsum
2022-10-18 02:26:10 +00:00 · 2022-10-18 02:26:10 +00:00 · 792c41d95c
parent c7d2f077b0 12066b2895
commit 792c41d95c
10 changed files with 111 additions and 5 deletions
--- a/docs/api/api_python/mindspore.ops.function.rst
+++ b/docs/api/api_python/mindspore.ops.function.rst
@ -208,6 +208,7 @@ Reduction函数
    mindspore.ops.argmin
    mindspore.ops.cummax
    mindspore.ops.cummin
+    mindspore.ops.cumsum
    mindspore.ops.logsumexp
    mindspore.ops.max
    mindspore.ops.mean
--- a/docs/api/api_python/ops/mindspore.ops.func_cumsum.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_cumsum.rst
@ -0,0 +1,24 @@
+mindspore.ops.cumsum
+====================
+
+.. py:function:: mindspore.ops.cumsum(x, axis, dtype=None)
+
+    计算输入张量 `x` 沿维度 `axis` 的累积和。
+
+    .. math::
+        y_i = x_1 + x_2 + x_3 + ... + x_i
+
+    .. note::
+        目前Ascend平台上，对于静态shape的场景， `x` 的数据类型暂仅支持：int8、uint8、int32，float32和float16；对于动态shape的场景， `x` 的数据类型暂仅支持：int32、float32和float16。
+
+    参数：
+        - **x** (Tensor) - 输入要累积和的Tensor。
+        - **axis** (int) - 累积和计算的维度。
+        - **dtype** (:class:`mindspore.dtype`, optional) - 输出数据类型。如果不为None，则输入会转化为 `dtype`。这有利于防止数值溢出。如果为None，则输出和输入的数据类型一致。默认值：None。
+
+    返回：
+        Tensor，和输入Tensor的形状相同。
+
+    异常：
+        - **TypeError** - 如果 `x` 不是Tensor。
+        - **ValueError** - 如果 `axis` 超出范围。
--- a/docs/api/api_python_en/mindspore.ops.function.rst
+++ b/docs/api/api_python_en/mindspore.ops.function.rst
@ -208,6 +208,7 @@ Reduction Functions
    mindspore.ops.argmin
    mindspore.ops.cummax
    mindspore.ops.cummin
+    mindspore.ops.cumsum
    mindspore.ops.logsumexp
    mindspore.ops.max
    mindspore.ops.mean
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cumsum_impl.cu
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cumsum_impl.cu
@ -178,3 +178,11 @@ template CUDA_LIB_EXPORT void CumSum<float>(const float *input, float *output, f
 template CUDA_LIB_EXPORT void CumSum<half>(const half *input, half *output, half *workspace, size_t dim0, size_t dim1,
                                           size_t dim2, size_t stride, size_t stride2, bool exclusive_, bool reverse_,
                                           const uint32_t &device_id, cudaStream_t stream);
+template CUDA_LIB_EXPORT void CumSum<Complex<float>>(const Complex<float> *input, Complex<float> *output,
+                                                     Complex<float> *workspace, size_t dim0, size_t dim1, size_t dim2,
+                                                     size_t stride, size_t stride2, bool exclusive_, bool reverse_,
+                                                     const uint32_t &device_id, cudaStream_t stream);
+template CUDA_LIB_EXPORT void CumSum<Complex<double>>(const Complex<double> *input, Complex<double> *output,
+                                                      Complex<double> *workspace, size_t dim0, size_t dim1, size_t dim2,
+                                                      size_t stride, size_t stride2, bool exclusive_, bool reverse_,
+                                                      const uint32_t &device_id, cudaStream_t stream);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cumsum_impl.cuh
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cumsum_impl.cuh
@ -17,6 +17,7 @@
 #ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUMSUM_IMPL_CUH_
 #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUMSUM_IMPL_CUH_
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
+#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
 template <typename T>
 CUDA_LIB_EXPORT void CumSum(const T *input, T *output, T *workspace, size_t dim0, size_t dim1, size_t dim2,
                            size_t stride, size_t stride2, bool exclusive_, bool reverse_, const uint32_t &device_id,
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.cc
@ -150,6 +150,10 @@ std::vector<std::pair<KernelAttr, CumSumGpuKernelMod::CumSumLaunchFunc>> CumSumG
   &CumSumGpuKernelMod::LaunchKernel<float>},
  {KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
   &CumSumGpuKernelMod::LaunchKernel<double>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<float>>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<double>>},
  // Dynamic shape related.
  {KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt8),
   &CumSumGpuKernelMod::LaunchKernel<int8_t>},
@ -173,6 +177,10 @@ std::vector<std::pair<KernelAttr, CumSumGpuKernelMod::CumSumLaunchFunc>> CumSumG
   &CumSumGpuKernelMod::LaunchKernel<float>},
  {KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
   &CumSumGpuKernelMod::LaunchKernel<double>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeComplex64),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<float>>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex128).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeComplex128),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<double>>},
  {KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt8),
   &CumSumGpuKernelMod::LaunchKernel<int8_t>},
  {KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt16),
@ -195,6 +203,10 @@ std::vector<std::pair<KernelAttr, CumSumGpuKernelMod::CumSumLaunchFunc>> CumSumG
   &CumSumGpuKernelMod::LaunchKernel<float>},
  {KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
   &CumSumGpuKernelMod::LaunchKernel<double>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeComplex64),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<float>>},
+  {KernelAttr().AddInputAttr(kNumberTypeComplex128).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeComplex128),
+   &CumSumGpuKernelMod::LaunchKernel<utils::Complex<double>>},
 };

 std::vector<KernelAttr> CumSumGpuKernelMod::GetOpSupport() {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/CMakeLists.txt
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/CMakeLists.txt
@ -1,5 +1,6 @@
 # cuda
 find_package(CUDA)
+add_compile_definitions(ENABLE_GPU)
 file(GLOB_RECURSE CUDA_KERNEL_SRC
    ${CMAKE_CURRENT_SOURCE_DIR}/*.cu
    ${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/gather.cu
@ -10,6 +11,6 @@ file(GLOB_RECURSE CUDA_KERNEL_SRC
    )

 set_source_files_properties(${CUDA_KERNEL_SRC} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGES} -std=c++14 -fPIC")
-SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-std=c++14;-arch=sm_53)
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGES} -fPIC")
+SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_53)
 cuda_add_library(cuda_kernel_mid STATIC ${CUDA_KERNEL_SRC})
--- a/mindspore/python/mindspore/ops/function/init.py
+++ b/mindspore/python/mindspore/ops/function/init.py
@ -255,6 +255,7 @@ from .math_func import (
    baddbmm,
    cummin,
    cummax,
+    cumsum,
    amin,
    amax,
    mean,
--- a/mindspore/python/mindspore/ops/function/math_func.py
+++ b/mindspore/python/mindspore/ops/function/math_func.py
@ -163,6 +163,7 @@ sparse_segment_mean_ = SparseSegmentMean()
 xlogy_ = P.Xlogy()
 square_ = P.Square()
 sqrt_ = P.Sqrt()
+cumsum_ = P.CumSum()


 #####################################
@ -4253,6 +4254,62 @@ def cummax(x, axis):
    return _cummax(x)


+def cumsum(x, axis, dtype=None):
+    """
+    Computes the cumulative sum of input Tensor along axis.
+
+    .. math::
+
+        y_i = x_1 + x_2 + x_3 + ... + x_i
+
+    Note:
+        On Ascend, the dtype of `x` only support :int8, uint8, int32, float16 or float32 in case of static shape.
+        For the case of dynamic shape, the dtype of `x` only support int32, float16 or float32.
+
+    Args:
+        x (Tensor): The input Tensor to accumulate.
+        axis (int): Axis along which the cumulative sum is computed.
+        dtype (:class:`mindspore.dtype`, optional): The desired dtype of returned Tensor. If specified,
+            the input Tensor will be cast to `dtype` before the computation. This is useful for preventing overflows.
+            If not specified, stay the same as original Tensor. Default: None.
+
+    Returns:
+        Tensor, the shape of the output Tensor is consistent with the input Tensor's.
+
+    Raises:
+        TypeError: If `x` is not a Tensor.
+        ValueError: If the axis is out of range.
+
+    Supported Platforms:
+        ``Ascend`` ``GPU`` ``CPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import numpy as np
+        >>> from mindspore import Tensor
+        >>> import mindspore.ops as ops
+        >>> x = Tensor(np.array([[3, 4, 6, 10], [1, 6, 7, 9], [4, 3, 8, 7], [1, 3, 7, 9]]).astype(np.float32))
+        >>> # case 1: along the axis 0
+        >>> y = ops.cumsum(x, 0)
+        >>> print(y)
+        [[ 3.  4.  6. 10.]
+         [ 4. 10. 13. 19.]
+         [ 8. 13. 21. 26.]
+         [ 9. 16. 28. 35.]]
+        >>> # case 2: along the axis 1
+        >>> y = ops.cumsum(x, 1)
+        >>> print(y)
+        [[ 3.  7. 13. 23.]
+         [ 1.  7. 14. 23.]
+         [ 4.  7. 15. 22.]
+         [ 1.  4. 11. 20.]]
+    """
+    if dtype is not None and x.dtype != dtype:
+        x = x.astype(dtype, copy=False)
+    validator.check_axis_in_range(axis, x.ndim)
+    return cumsum_(x, axis)
+
+
 def sparse_segment_mean(x, indices, segment_ids):
    r"""
    Computes a Tensor such that :math:`output_i = \frac{\sum_j x_{indices[j]}}{N}` where mean is over :math:`j` such
@ -6069,7 +6126,7 @@ def dotrapezoid(y, dx, dim):
    y_left = select_(y, dim, 0)
    y_right = select_(y, dim, -1)
    y_sum = y.sum(dim)
-    return (y_sum - (y_left  + y_right) * 0.5) * dx
+    return (y_sum - (y_left + y_right) * 0.5) * dx


 def dotrapezoid_tensor(y, dx, dim):
@ -6096,7 +6153,7 @@ def add_padding_to_shape(curr_shape, target_n_dim):
 def zeros_like_except(y, dim):
    _check_dim_in_range(dim, y.ndim)
    dim = dim + y.ndim if dim < 0 else dim
-    sizes = y.shape[:dim] + y.shape[dim+1:]
+    sizes = y.shape[:dim] + y.shape[dim + 1:]
    zeros = P.Zeros()(sizes, y.dtype)
    return zeros

@ -6587,6 +6644,7 @@ __all__ = [
    'baddbmm',
    'cummin',
    'cummax',
+    'cumsum',
    'amin',
    'amax',
    'mean',
--- a/mindspore/python/mindspore/ops/functional.py
+++ b/mindspore/python/mindspore/ops/functional.py
@ -300,7 +300,6 @@ bool_not = Primitive("bool_not")
 bool_or = Primitive("bool_or")
 bool_and = Primitive("bool_and")
 bool_eq = Primitive("bool_eq")
-cumsum = P.CumSum()
 cumprod = P.CumProd()
 array_to_scalar = Primitive('array_to_scalar')
 is_ = Primitive("is_")