!38400 Add chinese desc and fix implicit problem && Optimize perf

Merge pull request !38400 from zhangzhaoju/ms_master
2022-07-20 06:28:40 +00:00 · 2022-07-20 06:28:40 +00:00 · aac6a587f4
parent 7c582c3302 6839dc8ad5
commit aac6a587f4
7 changed files with 132 additions and 34 deletions
--- a/docs/api/api_python/mindspore/mindspore.Tensor.rst
+++ b/docs/api/api_python/mindspore/mindspore.Tensor.rst
@ -1968,3 +1968,28 @@ mindspore.Tensor
        - **TypeError** - 如果 `x` 和 `y` 不是数值型、bool或Tensor。
        - **TypeError** - 如果 `x` 和 `y` 的数据类型不是float16、float32或float64。
        - **ValueError** - 如果 `x` 不能广播到与 `y` 的shape一致。
+
+    .. py:method:: xdivy(y)
+
+        计算原Tensor除以输入的Tensor。当原Tensor为零时，则返回零。原Tensor的数据类型需要是float，complex或bool。
+        后面为了使表达清晰，使用 `x` 代替原Tensor。
+
+        .. math::
+            out_i = x_{i}\y_{i}
+
+        `x` 和 `y` 的输入遵循隐式类型转换规则，使数据类型一致。y是一个Tensor或Scalar。当y是Tensor时，x和y的数据类型不能同时是bool的，它们的shape可以广播。当y是Scalar时，只能是一个常量。
+
+        **参数：**
+
+        - **y** (Union[Tensor, number.Number, bool]) - float、complex或bool类型的Tensor。`x` 和 `y` 不能同时为bool类型。
+
+        **返回：**
+
+        Tensor，shape与广播后的shape相同，数据类型为两个输入中精度较高或数数值较高的类型。
+
+        **异常：**
+
+        - **TypeError** - 如果 `y` 不是以下之一：Tensor、Number、bool。
+        - **TypeError** - 如果 `x` 和 `y` 的数据类型不是float16、float32、float64、complex64、complex128、bool。
+        - **ValueError** - 如果 `x` 不能广播至 `y` 的shape。
+        - **RuntimeError** - 如果Parameter的 `x` , `y` 需要进行数据类型转换，但是Parameter是不支持数据类型转换。
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/xdivy_cpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/xdivy_cpu_kernel.cc
@ -53,14 +53,10 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
  auto y_addr = reinterpret_cast<T *>(inputs[1]->addr);
  auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
  size_t output_size = outputs[0]->size / sizeof(T);
-  BroadcastIterator base_iter(x_shape_, y_shape_, out_shape_);
-  auto task = [&x_addr, &y_addr, &output_addr, &base_iter](size_t start, size_t end) {
-    auto iter = base_iter;
-    iter.SetPos(start);
+  auto sameShapeTask = [&x_addr, &y_addr, &output_addr](size_t start, size_t end) {
    for (size_t i = start; i < end; i++) {
-      auto dividend = x_addr[iter.GetInputPosA()];
-      auto divisor = y_addr[iter.GetInputPosB()];
-      iter.GenNextPos();
+      auto dividend = x_addr[i];
+      auto divisor = y_addr[i];
      auto zero = (T)0;
      if (divisor == zero) {
        if (dividend == zero) {
@ -73,7 +69,30 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
      output_addr[i] = dividend / divisor;
    }
  };
-  ParallelLaunchAutoSearch(task, output_size, this, &parallel_search_info_);
+  auto diffShapeTask = [this, &x_addr, &y_addr, &output_addr](size_t start, size_t end) {
+    for (size_t i = start; i < end; i++) {
+      auto idxX = index_listx_[i];
+      auto idxY = index_listy_[i];
+      auto dividend = x_addr[idxX];
+      auto divisor = y_addr[idxY];
+      auto zero = (T)0;
+      if (divisor == zero) {
+        if (dividend == zero) {
+          output_addr[i] = zero;
+          continue;
+        }
+        output_addr[i] = GetDivZeroVal(dividend);
+        continue;
+      }
+      output_addr[i] = dividend / divisor;
+    }
+  };
+
+  if (is_need_broadcast_) {
+    ParallelLaunch(diffShapeTask, output_size, 0, this, pool_);
+  } else {
+    ParallelLaunch(sameShapeTask, output_size, 0, this, pool_);
+  }
  return true;
 }

@ -111,6 +130,68 @@ bool XdivyCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::ve
  return true;
 }

+void GetBroadCastIndex(const ShapeVector &unaligned_input_shape, const ShapeVector &output_shape,
+                       std::vector<int64_t> *index_list) {
+  // Given unaligned input shape and output shape, this function returns the mapping
+  // from indices of output (logical) to corespondingly real input indices (physical).
+  // The return will write to index_list, whose size is equal to total elements of output.
+  constexpr int MaxDim = 10;
+  int64_t logical_shape[MaxDim];
+  int64_t physical_shape[MaxDim];
+  int64_t size = 0, output_size = 1;
+  // Align input shape to output shape by filling one into the outermost dimension.
+  ShapeVector input_shape(output_shape.size());
+  for (size_t i = 0, j = output_shape.size() - unaligned_input_shape.size(); i < output_shape.size(); i++) {
+    input_shape[i] = i < j ? 1 : unaligned_input_shape[i - j];
+  }
+  // Get logical shape and physical shape of input. Moreover, we will merge the dimensions with same
+  // (logical or physical) property.
+  for (int i = SizeToInt(output_shape.size()) - 1; i >= 0;) {
+    int64_t stride = 1;
+    bool change = false, is_valid = false;
+    while (i >= 0 && input_shape[i] == output_shape[i]) {
+      stride *= output_shape[i];
+      change = is_valid = true;
+      --i;
+    }
+    if (change) {
+      output_size *= stride;
+      logical_shape[size] = physical_shape[size] = stride;
+      size++;
+    }
+    change = false;
+    stride = 1;
+    while (i >= 0 && input_shape[i] == 1) {
+      stride *= output_shape[i];
+      change = is_valid = true;
+      --i;
+    }
+    if (change) {
+      output_size *= stride;
+      logical_shape[size] = 1;
+      physical_shape[size] = stride;
+      size++;
+    }
+    if (!is_valid) {
+      MS_LOG(EXCEPTION) << "Both shape are not able to broadcast, input shape is " << unaligned_input_shape
+                        << " and output shape is " << output_shape;
+    }
+  }
+  // Get the flatten input indices according to "logical_shape" and "physical_shape".
+  int64_t offset = 1;
+  int64_t stride = 1;
+  index_list->resize(output_size);
+  (*index_list)[0] = 0;  // First element is set to 0.
+  for (int64_t i = 0; i < size; ++i) {
+    int64_t increment = (logical_shape[i] == physical_shape[i] ? stride : 0);
+    for (int64_t j = 0; j < (physical_shape[i] - 1) * offset; ++j) {
+      (*index_list)[offset + j] = (*index_list)[j] + increment;
+    }
+    offset *= physical_shape[i];
+    stride *= logical_shape[i];
+  }
+}
+
 int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
                              const std::vector<KernelTensorPtr> &outputs,
                              const std::map<uint32_t, tensor::TensorPtr> &) {
@ -122,19 +203,13 @@ int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::v
    return ret;
  }

-  x_shape_ = inputs[0]->GetShapeVector();
-  y_shape_ = inputs[1]->GetShapeVector();
-  out_shape_ = outputs[0]->GetShapeVector();
-  if (out_shape_.empty()) {
-    out_shape_.emplace_back(1);
-  }
-  auto x_shape_len = x_shape_.size();
-  for (size_t i = 0; i < out_shape_.size() - x_shape_len; ++i) {
-    (void)x_shape_.insert(x_shape_.begin(), 1);
-  }
-  auto y_shape_len = y_shape_.size();
-  for (size_t i = 0; i < out_shape_.size() - y_shape_len; ++i) {
-    (void)y_shape_.insert(y_shape_.begin(), 1);
+  auto x_shape = inputs[0]->GetShapeVector();
+  auto y_shape = inputs[1]->GetShapeVector();
+  auto out_shape = outputs[0]->GetShapeVector();
+  is_need_broadcast_ = x_shape != y_shape;
+  if (is_need_broadcast_) {
+    GetBroadCastIndex(x_shape, out_shape, &index_listx_);
+    GetBroadCastIndex(y_shape, out_shape, &index_listy_);
  }
  return KRET_OK;
 }
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/xdivy_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/xdivy_cpu_kernel.h
@ -48,9 +48,8 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
    input_size_list_.clear();
    output_size_list_.clear();
    workspace_size_list_.clear();
-    x_shape_.clear();
-    y_shape_.clear();
-    out_shape_.clear();
+    index_listx_.clear();
+    index_listy_.clear();
  }

 private:
@ -63,9 +62,10 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
  static std::vector<KernelAttr> support_ops_;
  static std::map<mindspore::TypeId, XdivyFunc> func_map_;
  XdivyFunc kernel_func_;
-  ShapeVector x_shape_;
-  ShapeVector y_shape_;
-  ShapeVector out_shape_;
+  // Broadcast related.
+  std::vector<int64_t> index_listx_{};
+  std::vector<int64_t> index_listy_{};
+  bool is_need_broadcast_{false};
 };
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/python/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/python/mindspore/_extends/parse/standard_method.py
@ -2131,7 +2131,7 @@ def xdivy(x, y):
    r"""
    Divides the first input tensor by the second input tensor element-wise. Returns zero when `x` is zero.
    """
-    return F.tensor_xdivy(x, y)
+    return F.xdivy(x, y)


 def int_bool(x):
--- a/mindspore/python/mindspore/ops/function/init.py
+++ b/mindspore/python/mindspore/ops/function/init.py
@ -146,7 +146,6 @@ from .math_func import (
    tensor_floordiv,
    floor_div,
    floordiv,
-    tensor_xdivy,
    xdivy,
    tensor_pow,
    pow,
--- a/mindspore/python/mindspore/ops/function/math_func.py
+++ b/mindspore/python/mindspore/ops/function/math_func.py
@ -80,7 +80,7 @@ tensor_mul = P.Mul()
 tensor_div = P.RealDiv()
 tensor_floordiv = P.FloorDiv()
 floordiv = tensor_floordiv
-tensor_xdivy = P.Xdivy()
+xdivy_ = P.Xdivy()
 tensor_pow = P.Pow()
 pows = tensor_pow
 tensor_mod = P.FloorMod()
@ -4665,7 +4665,7 @@ def xdivy(x, y):
        >>> print(output)
        [ 1.   2.  -0.5]
    """
-    return tensor_xdivy(x, y)
+    return xdivy_(x, y)


 def log10(x):
@ -4972,7 +4972,6 @@ __all__ = [
    'tensor_floordiv',
    'floor_div',
    'floordiv',
-    'tensor_xdivy',
    'xdivy',
    'tensor_pow',
    'pow',
--- a/mindspore/python/mindspore/ops/operations/math_ops.py
+++ b/mindspore/python/mindspore/ops/operations/math_ops.py
@ -3285,8 +3285,8 @@ class Xdivy(Primitive):

    # Let x/y using same sig_dtype to enable implicit conversion for compatibility
    __mindspore_signature__ = (
-        sig.make_sig('x', dtype=sig.sig_dtype.T),
-        sig.make_sig('y', dtype=sig.sig_dtype.T)
+        sig.make_sig('x', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T),
+        sig.make_sig('y', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T)
    )

    @prim_attr_register