forked from mindspore-Ecosystem/mindspore
!49561 xlogy performance optimize
Merge pull request !49561 from zhangzhaoju/master_xlogy
This commit is contained in:
commit
d22f57ae7a
|
@ -48,6 +48,7 @@
|
|||
"mindspore/mindspore/ccsrc/pipeline/pynative/grad/bprop_expander/grad_ops/grad_quant_ops.cc" "internalAstError"
|
||||
"mindspore/mindspore/ccsrc/pipeline/pynative/grad/bprop_expander/grad_ops/grad_scipy_ops.cc" "internalAstError"
|
||||
"mindspore/mindspore/ccsrc/pipeline/pynative/grad/bprop_expander/grad_ops/grad_sparse_ops.cc" "internalAstError"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/xlogy_cpu_kernel.cc" "unreadVariable"
|
||||
|
||||
# MindData
|
||||
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"
|
||||
|
@ -114,4 +115,4 @@
|
|||
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/" "identicalConditionAfterEarlyExit"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/" "uninitMemberVar"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/" "redundantInitialization"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/" "redundantCondition"
|
||||
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/" "redundantCondition"
|
||||
|
|
|
@ -50,7 +50,6 @@ constexpr auto kFloorDiv = "FloorDiv";
|
|||
constexpr auto kMod = "Mod";
|
||||
constexpr auto kFloorMod = "FloorMod";
|
||||
constexpr auto kSquaredDifference = "SquaredDifference";
|
||||
constexpr auto kXlogy = "Xlogy";
|
||||
constexpr auto kAtan2 = "Atan2";
|
||||
|
||||
template <typename T>
|
||||
|
@ -218,8 +217,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
|
|||
{kFloorDiv, &ArithmeticCpuTypeFunc<T>::FloorDiv},
|
||||
{kAtan2, &ArithmeticCpuTypeFunc<T>::Atan2},
|
||||
{kRealDiv, &ArithmeticCpuTypeFunc<T>::RealDiv},
|
||||
{kSquaredDifference, &ArithmeticCpuTypeFunc<T>::SquaredDifference},
|
||||
{kXlogy, &ArithmeticCpuTypeFunc<T>::Xlogy}};
|
||||
{kSquaredDifference, &ArithmeticCpuTypeFunc<T>::SquaredDifference}};
|
||||
} else {
|
||||
dtype_desc = "complex data";
|
||||
arithmeticMathFuncMap = {{kSquaredDifference, &ArithmeticCpuTypeFunc<T>::SquaredDifferenceComplex},
|
||||
|
@ -230,8 +228,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
|
|||
{kMul, &ArithmeticCpuTypeFunc<T>::Mul},
|
||||
{kDivNoNan, &ArithmeticCpuTypeFunc<T>::DivNoNan},
|
||||
{kAddV2, &ArithmeticCpuTypeFunc<T>::AddV2},
|
||||
{kPow, &ArithmeticCpuTypeFunc<T>::PowComplex},
|
||||
{kXlogy, &ArithmeticCpuTypeFunc<T>::Xlogy}};
|
||||
{kPow, &ArithmeticCpuTypeFunc<T>::PowComplex}};
|
||||
}
|
||||
if (arithmeticMathFuncMap.find(kernel_name_) == arithmeticMathFuncMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "For 'Arithmetic', it only supports operators in "
|
||||
|
@ -1221,23 +1218,6 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
|
|||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
SpecializeArithFunc<complex128>}}},
|
||||
{kXlogy,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeArithFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
SpecializeArithFunc<double>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
SpecializeArithFunc<complex64>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
SpecializeArithFunc<complex128>}}},
|
||||
{kAtan2,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>},
|
||||
|
@ -1344,8 +1324,6 @@ MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, AssignSub,
|
|||
|
||||
MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, SquaredDifference,
|
||||
[]() { return std::make_shared<ArithmeticCpuKernelMod>(kSquaredDifference); });
|
||||
MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, Xlogy,
|
||||
[]() { return std::make_shared<ArithmeticCpuKernelMod>(kXlogy); });
|
||||
MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, Atan2,
|
||||
[]() { return std::make_shared<ArithmeticCpuKernelMod>(kAtan2); });
|
||||
MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, AddV2,
|
||||
|
|
|
@ -45,52 +45,23 @@ complex64 GetDivZeroVal(const complex64 &) {
|
|||
return std::numeric_limits<complex64>::quiet_NaN();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
bool isZero(const T &val) {
|
||||
return val == T(0.0f);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool isZero(const float &val) {
|
||||
return std::fpclassify(val) == FP_ZERO;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool isZero(const double &val) {
|
||||
return std::fpclassify(val) == FP_ZERO;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SameShapeTask(T *x_addr, T *y_addr, T *output_addr, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto dividend = x_addr[i];
|
||||
auto divisor = y_addr[i];
|
||||
if (isZero(divisor)) {
|
||||
if (isZero(dividend)) {
|
||||
output_addr[i] = static_cast<T>(0.0);
|
||||
continue;
|
||||
}
|
||||
output_addr[i] = GetDivZeroVal(dividend);
|
||||
continue;
|
||||
}
|
||||
output_addr[i] = dividend / divisor;
|
||||
}
|
||||
void XDivySameShapeTask(T *x_addr, T *y_addr, T *output_addr, size_t start, size_t end) {
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> x_v(x_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> y_v(y_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> o_v(output_addr + start, end - start);
|
||||
o_v = (x_v == T(0)).select(o_v, x_v / y_v);
|
||||
}
|
||||
|
||||
template <>
|
||||
void SameShapeTask(float *x_addr, float *y_addr, float *output_addr, size_t start, size_t end) {
|
||||
Eigen::Map<Eigen::ArrayXf> x_v(x_addr + start, end - start);
|
||||
Eigen::Map<Eigen::ArrayXf> y_v(y_addr + start, end - start);
|
||||
Eigen::Map<Eigen::ArrayXf> o_v(output_addr + start, end - start);
|
||||
o_v = (x_v == 0).select(o_v, x_v / y_v);
|
||||
}
|
||||
|
||||
template <>
|
||||
void SameShapeTask(double *x_addr, double *y_addr, double *output_addr, size_t start, size_t end) {
|
||||
Eigen::Map<Eigen::ArrayXd> x_v(x_addr + start, end - start);
|
||||
Eigen::Map<Eigen::ArrayXd> y_v(y_addr + start, end - start);
|
||||
Eigen::Map<Eigen::ArrayXd> o_v(output_addr + start, end - start);
|
||||
o_v = (x_v == 0).select(o_v, x_v / y_v);
|
||||
void XDivySameShapeTask(float16 *x_addr, float16 *y_addr, float16 *output_addr, size_t start, size_t end) {
|
||||
Eigen::half *ex_addr = reinterpret_cast<Eigen::half *>(x_addr);
|
||||
Eigen::half *ey_addr = reinterpret_cast<Eigen::half *>(y_addr);
|
||||
Eigen::half *eo_addr = reinterpret_cast<Eigen::half *>(output_addr);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> x_v(ex_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> y_v(ey_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> o_v(eo_addr + start, end - start);
|
||||
o_v = (x_v == Eigen::half(0)).select(o_v, x_v / y_v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -107,14 +78,12 @@ bool XdivyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inpu
|
|||
auto output_addr = static_cast<T *>(outputs[0]->addr);
|
||||
size_t output_size = outputs[0]->size / sizeof(T);
|
||||
auto sameShapeTask = [&x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
SameShapeTask(x_addr, y_addr, output_addr, start, end);
|
||||
XDivySameShapeTask(x_addr, y_addr, output_addr, start, end);
|
||||
};
|
||||
auto diffShapeTask = [this, &x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto idxX = index_listx_[i];
|
||||
auto idxY = index_listy_[i];
|
||||
auto dividend = x_addr[idxX];
|
||||
auto divisor = y_addr[idxY];
|
||||
auto dividend = x_addr[index_listx_[i]];
|
||||
auto divisor = y_addr[index_listy_[i]];
|
||||
auto zero = static_cast<T>(0);
|
||||
if (divisor == zero) {
|
||||
if (dividend == zero) {
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/cpu/kernel/xlogy_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "Eigen/Eigen"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using complex64 = std::complex<float>;
|
||||
using complex128 = std::complex<double>;
|
||||
static constexpr size_t INPUT_NUM = 2;
|
||||
static constexpr size_t OUTPUT_NUM = 1;
|
||||
static constexpr int MAX_DIMS = 7;
|
||||
static constexpr size_t PARALLEL_THRESHOLD = 4096;
|
||||
|
||||
template <typename T>
|
||||
void XlogySameShapeTask(T *x_addr, T *y_addr, T *output_addr, size_t start, size_t end) {
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> x_v(x_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> y_v(y_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<T, -1, 1>> o_v(output_addr + start, end - start);
|
||||
o_v = x_v * y_v.log();
|
||||
}
|
||||
|
||||
template <>
|
||||
void XlogySameShapeTask(float16 *x_addr, float16 *y_addr, float16 *output_addr, size_t start, size_t end) {
|
||||
Eigen::half *ex_addr = reinterpret_cast<Eigen::half *>(x_addr);
|
||||
Eigen::half *ey_addr = reinterpret_cast<Eigen::half *>(y_addr);
|
||||
Eigen::half *eo_addr = reinterpret_cast<Eigen::half *>(output_addr);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> x_v(ex_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> y_v(ey_addr + start, end - start);
|
||||
Eigen::Map<Eigen::Array<Eigen::half, -1, 1>> o_v(eo_addr + start, end - start);
|
||||
o_v = x_v * y_v.log();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool XlogyCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), INPUT_NUM, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), OUTPUT_NUM, kernel_name_);
|
||||
if (has_null_input_) {
|
||||
return true;
|
||||
}
|
||||
auto x_addr = static_cast<T *>(inputs[0]->addr);
|
||||
auto y_addr = static_cast<T *>(inputs[1]->addr);
|
||||
auto output_addr = static_cast<T *>(outputs[0]->addr);
|
||||
size_t output_size = outputs[0]->size / sizeof(T);
|
||||
auto sameShapeTask = [&x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
XlogySameShapeTask(x_addr, y_addr, output_addr, start, end);
|
||||
};
|
||||
auto diffShapeTask = [this, &x_addr, &y_addr, &output_addr](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto x1 = x_addr[index_listx_[i]];
|
||||
auto x2 = y_addr[index_listy_[i]];
|
||||
auto logx2 = log(x2);
|
||||
output_addr[i] = x1 * logx2;
|
||||
}
|
||||
};
|
||||
|
||||
CTask task = is_need_broadcast_ ? CTask(diffShapeTask) : CTask(sameShapeTask);
|
||||
if (output_size < PARALLEL_THRESHOLD) {
|
||||
task(0, output_size);
|
||||
} else {
|
||||
ParallelLaunch(task, output_size, PARALLEL_THRESHOLD, this, pool_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool XlogyCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
bool XlogyCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
kernel_name_ = base_operator->name();
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), INPUT_NUM, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), OUTPUT_NUM, kernel_name_);
|
||||
auto x_type = inputs[0]->GetDtype();
|
||||
auto y_type = inputs[1]->GetDtype();
|
||||
auto out_type = outputs[0]->GetDtype();
|
||||
if (!(x_type == y_type && x_type == out_type)) {
|
||||
MS_LOG(ERROR) << "Xlogy need same input and output data type, but got X type:" << x_type << " Y type:" << y_type
|
||||
<< " out type:" << out_type;
|
||||
return false;
|
||||
}
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int XlogyCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), INPUT_NUM, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), OUTPUT_NUM, kernel_name_);
|
||||
ResetResource();
|
||||
if (int ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto x_shape = LongVecToSizeVec(inputs.at(kIndex0)->GetShapeVector());
|
||||
auto y_shape = LongVecToSizeVec(inputs.at(kIndex1)->GetShapeVector());
|
||||
|
||||
// while has null input, xlogy result is null too
|
||||
has_null_input_ = CheckNullInput(x_shape);
|
||||
has_null_input_ = has_null_input_ || CheckNullInput(y_shape);
|
||||
if (has_null_input_) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto out_shape = LongVecToSizeVec(outputs.at(kIndex0)->GetShapeVector());
|
||||
if (out_shape.size() > MAX_DIMS || out_shape.size() < x_shape.size() || out_shape.size() < y_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be greater than " << MAX_DIMS
|
||||
<< ", and output dimension can't less than input; but got x_shape dimension:" << x_shape.size()
|
||||
<< " ,y_shape dimension:" << y_shape.size() << " ,out_shape dimension:" << out_shape.size();
|
||||
}
|
||||
is_need_broadcast_ = x_shape != y_shape;
|
||||
if (is_need_broadcast_) {
|
||||
GetBroadCastIndex(x_shape, out_shape, &index_listx_);
|
||||
GetBroadCastIndex(y_shape, out_shape, &index_listy_);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const std::vector<std::pair<KernelAttr, XlogyCpuKernelMod::KernelRunFunc>> &XlogyCpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, XlogyCpuKernelMod::KernelRunFunc>> func_list = {
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&XlogyCpuKernelMod::LaunchKernel<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&XlogyCpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&XlogyCpuKernelMod::LaunchKernel<double>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
&XlogyCpuKernelMod::LaunchKernel<complex64>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
&XlogyCpuKernelMod::LaunchKernel<complex128>},
|
||||
};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, Xlogy, XlogyCpuKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_XLOGY_CPU_KERNEL_H
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_XLOGY_CPU_KERNEL_H
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class XlogyCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper<XlogyCpuKernelMod> {
|
||||
public:
|
||||
XlogyCpuKernelMod() { ResetResource(); }
|
||||
~XlogyCpuKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
|
||||
|
||||
std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }
|
||||
|
||||
protected:
|
||||
void ResetResource() noexcept {
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
index_listx_.clear();
|
||||
index_listy_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs);
|
||||
std::vector<size_t> index_listx_{};
|
||||
std::vector<size_t> index_listy_{};
|
||||
bool is_need_broadcast_{false};
|
||||
bool has_null_input_{false};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_XLOGY_CPU_KERNEL_H
|
Loading…
Reference in New Issue