!38400 Add chinese desc and fix implicit problem && Optimize perf

Merge pull request !38400 from zhangzhaoju/ms_master
This commit is contained in:
i-robot 2022-07-20 06:28:40 +00:00 committed by Gitee
commit aac6a587f4
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
7 changed files with 132 additions and 34 deletions

View File

@ -1968,3 +1968,28 @@ mindspore.Tensor
- **TypeError** - 如果 `x``y` 不是数值型、bool或Tensor。
- **TypeError** - 如果 `x``y` 的数据类型不是float16、float32或float64。
- **ValueError** - 如果 `x` 不能广播到与 `y` 的shape一致。
.. py:method:: xdivy(y)
计算原Tensor除以输入的Tensor。当原Tensor为零时则返回零。原Tensor的数据类型需要是floatcomplex或bool。
后面为了使表达清晰,使用 `x` 代替原Tensor。
.. math::
out_i = x_{i}\y_{i}
`x``y` 的输入遵循隐式类型转换规则使数据类型一致。y是一个Tensor或Scalar。当y是Tensor时x和y的数据类型不能同时是bool的它们的shape可以广播。当y是Scalar时只能是一个常量。
**参数:**
- **y** (Union[Tensor, number.Number, bool]) - float、complex或bool类型的Tensor。`x``y` 不能同时为bool类型。
**返回:**
Tensorshape与广播后的shape相同数据类型为两个输入中精度较高或数数值较高的类型。
**异常:**
- **TypeError** - 如果 `y` 不是以下之一Tensor、Number、bool。
- **TypeError** - 如果 `x``y` 的数据类型不是float16、float32、float64、complex64、complex128、bool。
- **ValueError** - 如果 `x` 不能广播至 `y` 的shape。
- **RuntimeError** - 如果Parameter的 `x` , `y` 需要进行数据类型转换但是Parameter是不支持数据类型转换。

View File

@ -53,14 +53,10 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
auto y_addr = reinterpret_cast<T *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
size_t output_size = outputs[0]->size / sizeof(T);
BroadcastIterator base_iter(x_shape_, y_shape_, out_shape_);
auto task = [&x_addr, &y_addr, &output_addr, &base_iter](size_t start, size_t end) {
auto iter = base_iter;
iter.SetPos(start);
auto sameShapeTask = [&x_addr, &y_addr, &output_addr](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto dividend = x_addr[iter.GetInputPosA()];
auto divisor = y_addr[iter.GetInputPosB()];
iter.GenNextPos();
auto dividend = x_addr[i];
auto divisor = y_addr[i];
auto zero = (T)0;
if (divisor == zero) {
if (dividend == zero) {
@ -73,7 +69,30 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
output_addr[i] = dividend / divisor;
}
};
ParallelLaunchAutoSearch(task, output_size, this, &parallel_search_info_);
auto diffShapeTask = [this, &x_addr, &y_addr, &output_addr](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto idxX = index_listx_[i];
auto idxY = index_listy_[i];
auto dividend = x_addr[idxX];
auto divisor = y_addr[idxY];
auto zero = (T)0;
if (divisor == zero) {
if (dividend == zero) {
output_addr[i] = zero;
continue;
}
output_addr[i] = GetDivZeroVal(dividend);
continue;
}
output_addr[i] = dividend / divisor;
}
};
if (is_need_broadcast_) {
ParallelLaunch(diffShapeTask, output_size, 0, this, pool_);
} else {
ParallelLaunch(sameShapeTask, output_size, 0, this, pool_);
}
return true;
}
@ -111,6 +130,68 @@ bool XdivyCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::ve
return true;
}
void GetBroadCastIndex(const ShapeVector &unaligned_input_shape, const ShapeVector &output_shape,
std::vector<int64_t> *index_list) {
// Given unaligned input shape and output shape, this function returns the mapping
// from indices of output (logical) to corespondingly real input indices (physical).
// The return will write to index_list, whose size is equal to total elements of output.
constexpr int MaxDim = 10;
int64_t logical_shape[MaxDim];
int64_t physical_shape[MaxDim];
int64_t size = 0, output_size = 1;
// Align input shape to output shape by filling one into the outermost dimension.
ShapeVector input_shape(output_shape.size());
for (size_t i = 0, j = output_shape.size() - unaligned_input_shape.size(); i < output_shape.size(); i++) {
input_shape[i] = i < j ? 1 : unaligned_input_shape[i - j];
}
// Get logical shape and physical shape of input. Moreover, we will merge the dimensions with same
// (logical or physical) property.
for (int i = SizeToInt(output_shape.size()) - 1; i >= 0;) {
int64_t stride = 1;
bool change = false, is_valid = false;
while (i >= 0 && input_shape[i] == output_shape[i]) {
stride *= output_shape[i];
change = is_valid = true;
--i;
}
if (change) {
output_size *= stride;
logical_shape[size] = physical_shape[size] = stride;
size++;
}
change = false;
stride = 1;
while (i >= 0 && input_shape[i] == 1) {
stride *= output_shape[i];
change = is_valid = true;
--i;
}
if (change) {
output_size *= stride;
logical_shape[size] = 1;
physical_shape[size] = stride;
size++;
}
if (!is_valid) {
MS_LOG(EXCEPTION) << "Both shape are not able to broadcast, input shape is " << unaligned_input_shape
<< " and output shape is " << output_shape;
}
}
// Get the flatten input indices according to "logical_shape" and "physical_shape".
int64_t offset = 1;
int64_t stride = 1;
index_list->resize(output_size);
(*index_list)[0] = 0; // First element is set to 0.
for (int64_t i = 0; i < size; ++i) {
int64_t increment = (logical_shape[i] == physical_shape[i] ? stride : 0);
for (int64_t j = 0; j < (physical_shape[i] - 1) * offset; ++j) {
(*index_list)[offset + j] = (*index_list)[j] + increment;
}
offset *= physical_shape[i];
stride *= logical_shape[i];
}
}
int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
@ -122,19 +203,13 @@ int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::v
return ret;
}
x_shape_ = inputs[0]->GetShapeVector();
y_shape_ = inputs[1]->GetShapeVector();
out_shape_ = outputs[0]->GetShapeVector();
if (out_shape_.empty()) {
out_shape_.emplace_back(1);
}
auto x_shape_len = x_shape_.size();
for (size_t i = 0; i < out_shape_.size() - x_shape_len; ++i) {
(void)x_shape_.insert(x_shape_.begin(), 1);
}
auto y_shape_len = y_shape_.size();
for (size_t i = 0; i < out_shape_.size() - y_shape_len; ++i) {
(void)y_shape_.insert(y_shape_.begin(), 1);
auto x_shape = inputs[0]->GetShapeVector();
auto y_shape = inputs[1]->GetShapeVector();
auto out_shape = outputs[0]->GetShapeVector();
is_need_broadcast_ = x_shape != y_shape;
if (is_need_broadcast_) {
GetBroadCastIndex(x_shape, out_shape, &index_listx_);
GetBroadCastIndex(y_shape, out_shape, &index_listy_);
}
return KRET_OK;
}

View File

@ -48,9 +48,8 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
x_shape_.clear();
y_shape_.clear();
out_shape_.clear();
index_listx_.clear();
index_listy_.clear();
}
private:
@ -63,9 +62,10 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
static std::vector<KernelAttr> support_ops_;
static std::map<mindspore::TypeId, XdivyFunc> func_map_;
XdivyFunc kernel_func_;
ShapeVector x_shape_;
ShapeVector y_shape_;
ShapeVector out_shape_;
// Broadcast related.
std::vector<int64_t> index_listx_{};
std::vector<int64_t> index_listy_{};
bool is_need_broadcast_{false};
};
} // namespace kernel
} // namespace mindspore

View File

@ -2131,7 +2131,7 @@ def xdivy(x, y):
r"""
Divides the first input tensor by the second input tensor element-wise. Returns zero when `x` is zero.
"""
return F.tensor_xdivy(x, y)
return F.xdivy(x, y)
def int_bool(x):

View File

@ -146,7 +146,6 @@ from .math_func import (
tensor_floordiv,
floor_div,
floordiv,
tensor_xdivy,
xdivy,
tensor_pow,
pow,

View File

@ -80,7 +80,7 @@ tensor_mul = P.Mul()
tensor_div = P.RealDiv()
tensor_floordiv = P.FloorDiv()
floordiv = tensor_floordiv
tensor_xdivy = P.Xdivy()
xdivy_ = P.Xdivy()
tensor_pow = P.Pow()
pows = tensor_pow
tensor_mod = P.FloorMod()
@ -4665,7 +4665,7 @@ def xdivy(x, y):
>>> print(output)
[ 1. 2. -0.5]
"""
return tensor_xdivy(x, y)
return xdivy_(x, y)
def log10(x):
@ -4972,7 +4972,6 @@ __all__ = [
'tensor_floordiv',
'floor_div',
'floordiv',
'tensor_xdivy',
'xdivy',
'tensor_pow',
'pow',

View File

@ -3285,8 +3285,8 @@ class Xdivy(Primitive):
# Let x/y using same sig_dtype to enable implicit conversion for compatibility
__mindspore_signature__ = (
sig.make_sig('x', dtype=sig.sig_dtype.T),
sig.make_sig('y', dtype=sig.sig_dtype.T)
sig.make_sig('x', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T),
sig.make_sig('y', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T)
)
@prim_attr_register