!38400 Add chinese desc and fix implicit problem && Optimize perf
Merge pull request !38400 from zhangzhaoju/ms_master
This commit is contained in:
commit
aac6a587f4
|
@ -1968,3 +1968,28 @@ mindspore.Tensor
|
|||
- **TypeError** - 如果 `x` 和 `y` 不是数值型、bool或Tensor。
|
||||
- **TypeError** - 如果 `x` 和 `y` 的数据类型不是float16、float32或float64。
|
||||
- **ValueError** - 如果 `x` 不能广播到与 `y` 的shape一致。
|
||||
|
||||
.. py:method:: xdivy(y)
|
||||
|
||||
计算原Tensor除以输入的Tensor。当原Tensor为零时,则返回零。原Tensor的数据类型需要是float,complex或bool。
|
||||
后面为了使表达清晰,使用 `x` 代替原Tensor。
|
||||
|
||||
.. math::
|
||||
out_i = x_{i}\y_{i}
|
||||
|
||||
`x` 和 `y` 的输入遵循隐式类型转换规则,使数据类型一致。y是一个Tensor或Scalar。当y是Tensor时,x和y的数据类型不能同时是bool的,它们的shape可以广播。当y是Scalar时,只能是一个常量。
|
||||
|
||||
**参数:**
|
||||
|
||||
- **y** (Union[Tensor, number.Number, bool]) - float、complex或bool类型的Tensor。`x` 和 `y` 不能同时为bool类型。
|
||||
|
||||
**返回:**
|
||||
|
||||
Tensor,shape与广播后的shape相同,数据类型为两个输入中精度较高或数数值较高的类型。
|
||||
|
||||
**异常:**
|
||||
|
||||
- **TypeError** - 如果 `y` 不是以下之一:Tensor、Number、bool。
|
||||
- **TypeError** - 如果 `x` 和 `y` 的数据类型不是float16、float32、float64、complex64、complex128、bool。
|
||||
- **ValueError** - 如果 `x` 不能广播至 `y` 的shape。
|
||||
- **RuntimeError** - 如果Parameter的 `x` , `y` 需要进行数据类型转换,但是Parameter是不支持数据类型转换。
|
||||
|
|
|
@ -53,14 +53,10 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
|
|||
auto y_addr = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
size_t output_size = outputs[0]->size / sizeof(T);
|
||||
BroadcastIterator base_iter(x_shape_, y_shape_, out_shape_);
|
||||
auto task = [&x_addr, &y_addr, &output_addr, &base_iter](size_t start, size_t end) {
|
||||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
auto sameShapeTask = [&x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto dividend = x_addr[iter.GetInputPosA()];
|
||||
auto divisor = y_addr[iter.GetInputPosB()];
|
||||
iter.GenNextPos();
|
||||
auto dividend = x_addr[i];
|
||||
auto divisor = y_addr[i];
|
||||
auto zero = (T)0;
|
||||
if (divisor == zero) {
|
||||
if (dividend == zero) {
|
||||
|
@ -73,7 +69,30 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
|
|||
output_addr[i] = dividend / divisor;
|
||||
}
|
||||
};
|
||||
ParallelLaunchAutoSearch(task, output_size, this, ¶llel_search_info_);
|
||||
auto diffShapeTask = [this, &x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto idxX = index_listx_[i];
|
||||
auto idxY = index_listy_[i];
|
||||
auto dividend = x_addr[idxX];
|
||||
auto divisor = y_addr[idxY];
|
||||
auto zero = (T)0;
|
||||
if (divisor == zero) {
|
||||
if (dividend == zero) {
|
||||
output_addr[i] = zero;
|
||||
continue;
|
||||
}
|
||||
output_addr[i] = GetDivZeroVal(dividend);
|
||||
continue;
|
||||
}
|
||||
output_addr[i] = dividend / divisor;
|
||||
}
|
||||
};
|
||||
|
||||
if (is_need_broadcast_) {
|
||||
ParallelLaunch(diffShapeTask, output_size, 0, this, pool_);
|
||||
} else {
|
||||
ParallelLaunch(sameShapeTask, output_size, 0, this, pool_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -111,6 +130,68 @@ bool XdivyCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::ve
|
|||
return true;
|
||||
}
|
||||
|
||||
void GetBroadCastIndex(const ShapeVector &unaligned_input_shape, const ShapeVector &output_shape,
|
||||
std::vector<int64_t> *index_list) {
|
||||
// Given unaligned input shape and output shape, this function returns the mapping
|
||||
// from indices of output (logical) to corespondingly real input indices (physical).
|
||||
// The return will write to index_list, whose size is equal to total elements of output.
|
||||
constexpr int MaxDim = 10;
|
||||
int64_t logical_shape[MaxDim];
|
||||
int64_t physical_shape[MaxDim];
|
||||
int64_t size = 0, output_size = 1;
|
||||
// Align input shape to output shape by filling one into the outermost dimension.
|
||||
ShapeVector input_shape(output_shape.size());
|
||||
for (size_t i = 0, j = output_shape.size() - unaligned_input_shape.size(); i < output_shape.size(); i++) {
|
||||
input_shape[i] = i < j ? 1 : unaligned_input_shape[i - j];
|
||||
}
|
||||
// Get logical shape and physical shape of input. Moreover, we will merge the dimensions with same
|
||||
// (logical or physical) property.
|
||||
for (int i = SizeToInt(output_shape.size()) - 1; i >= 0;) {
|
||||
int64_t stride = 1;
|
||||
bool change = false, is_valid = false;
|
||||
while (i >= 0 && input_shape[i] == output_shape[i]) {
|
||||
stride *= output_shape[i];
|
||||
change = is_valid = true;
|
||||
--i;
|
||||
}
|
||||
if (change) {
|
||||
output_size *= stride;
|
||||
logical_shape[size] = physical_shape[size] = stride;
|
||||
size++;
|
||||
}
|
||||
change = false;
|
||||
stride = 1;
|
||||
while (i >= 0 && input_shape[i] == 1) {
|
||||
stride *= output_shape[i];
|
||||
change = is_valid = true;
|
||||
--i;
|
||||
}
|
||||
if (change) {
|
||||
output_size *= stride;
|
||||
logical_shape[size] = 1;
|
||||
physical_shape[size] = stride;
|
||||
size++;
|
||||
}
|
||||
if (!is_valid) {
|
||||
MS_LOG(EXCEPTION) << "Both shape are not able to broadcast, input shape is " << unaligned_input_shape
|
||||
<< " and output shape is " << output_shape;
|
||||
}
|
||||
}
|
||||
// Get the flatten input indices according to "logical_shape" and "physical_shape".
|
||||
int64_t offset = 1;
|
||||
int64_t stride = 1;
|
||||
index_list->resize(output_size);
|
||||
(*index_list)[0] = 0; // First element is set to 0.
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
int64_t increment = (logical_shape[i] == physical_shape[i] ? stride : 0);
|
||||
for (int64_t j = 0; j < (physical_shape[i] - 1) * offset; ++j) {
|
||||
(*index_list)[offset + j] = (*index_list)[j] + increment;
|
||||
}
|
||||
offset *= physical_shape[i];
|
||||
stride *= logical_shape[i];
|
||||
}
|
||||
}
|
||||
|
||||
int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
|
@ -122,19 +203,13 @@ int XdivyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::v
|
|||
return ret;
|
||||
}
|
||||
|
||||
x_shape_ = inputs[0]->GetShapeVector();
|
||||
y_shape_ = inputs[1]->GetShapeVector();
|
||||
out_shape_ = outputs[0]->GetShapeVector();
|
||||
if (out_shape_.empty()) {
|
||||
out_shape_.emplace_back(1);
|
||||
}
|
||||
auto x_shape_len = x_shape_.size();
|
||||
for (size_t i = 0; i < out_shape_.size() - x_shape_len; ++i) {
|
||||
(void)x_shape_.insert(x_shape_.begin(), 1);
|
||||
}
|
||||
auto y_shape_len = y_shape_.size();
|
||||
for (size_t i = 0; i < out_shape_.size() - y_shape_len; ++i) {
|
||||
(void)y_shape_.insert(y_shape_.begin(), 1);
|
||||
auto x_shape = inputs[0]->GetShapeVector();
|
||||
auto y_shape = inputs[1]->GetShapeVector();
|
||||
auto out_shape = outputs[0]->GetShapeVector();
|
||||
is_need_broadcast_ = x_shape != y_shape;
|
||||
if (is_need_broadcast_) {
|
||||
GetBroadCastIndex(x_shape, out_shape, &index_listx_);
|
||||
GetBroadCastIndex(y_shape, out_shape, &index_listy_);
|
||||
}
|
||||
return KRET_OK;
|
||||
}
|
||||
|
|
|
@ -48,9 +48,8 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
|
|||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
x_shape_.clear();
|
||||
y_shape_.clear();
|
||||
out_shape_.clear();
|
||||
index_listx_.clear();
|
||||
index_listy_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -63,9 +62,10 @@ class XdivyCpuKernelMod : public NativeCpuKernelMod {
|
|||
static std::vector<KernelAttr> support_ops_;
|
||||
static std::map<mindspore::TypeId, XdivyFunc> func_map_;
|
||||
XdivyFunc kernel_func_;
|
||||
ShapeVector x_shape_;
|
||||
ShapeVector y_shape_;
|
||||
ShapeVector out_shape_;
|
||||
// Broadcast related.
|
||||
std::vector<int64_t> index_listx_{};
|
||||
std::vector<int64_t> index_listy_{};
|
||||
bool is_need_broadcast_{false};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -2131,7 +2131,7 @@ def xdivy(x, y):
|
|||
r"""
|
||||
Divides the first input tensor by the second input tensor element-wise. Returns zero when `x` is zero.
|
||||
"""
|
||||
return F.tensor_xdivy(x, y)
|
||||
return F.xdivy(x, y)
|
||||
|
||||
|
||||
def int_bool(x):
|
||||
|
|
|
@ -146,7 +146,6 @@ from .math_func import (
|
|||
tensor_floordiv,
|
||||
floor_div,
|
||||
floordiv,
|
||||
tensor_xdivy,
|
||||
xdivy,
|
||||
tensor_pow,
|
||||
pow,
|
||||
|
|
|
@ -80,7 +80,7 @@ tensor_mul = P.Mul()
|
|||
tensor_div = P.RealDiv()
|
||||
tensor_floordiv = P.FloorDiv()
|
||||
floordiv = tensor_floordiv
|
||||
tensor_xdivy = P.Xdivy()
|
||||
xdivy_ = P.Xdivy()
|
||||
tensor_pow = P.Pow()
|
||||
pows = tensor_pow
|
||||
tensor_mod = P.FloorMod()
|
||||
|
@ -4665,7 +4665,7 @@ def xdivy(x, y):
|
|||
>>> print(output)
|
||||
[ 1. 2. -0.5]
|
||||
"""
|
||||
return tensor_xdivy(x, y)
|
||||
return xdivy_(x, y)
|
||||
|
||||
|
||||
def log10(x):
|
||||
|
@ -4972,7 +4972,6 @@ __all__ = [
|
|||
'tensor_floordiv',
|
||||
'floor_div',
|
||||
'floordiv',
|
||||
'tensor_xdivy',
|
||||
'xdivy',
|
||||
'tensor_pow',
|
||||
'pow',
|
||||
|
|
|
@ -3285,8 +3285,8 @@ class Xdivy(Primitive):
|
|||
|
||||
# Let x/y using same sig_dtype to enable implicit conversion for compatibility
|
||||
__mindspore_signature__ = (
|
||||
sig.make_sig('x', dtype=sig.sig_dtype.T),
|
||||
sig.make_sig('y', dtype=sig.sig_dtype.T)
|
||||
sig.make_sig('x', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T),
|
||||
sig.make_sig('y', rw=sig.sig_rw.RW_READ, dtype=sig.sig_dtype.T)
|
||||
)
|
||||
|
||||
@prim_attr_register
|
||||
|
|
Loading…
Reference in New Issue