forked from mindspore-Ecosystem/mindspore
!22236 clean code in cpu operater
Merge pull request !22236 from liangxhao/master_cleancode
This commit is contained in:
commit
e06e83607e
|
@ -406,7 +406,7 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
if (output_shape_.size() == 0) {
|
||||
output_shape_.insert(output_shape_.begin(), 1);
|
||||
(void)output_shape_.insert(output_shape_.begin(), 1);
|
||||
}
|
||||
|
||||
output_size_ = 1;
|
||||
|
@ -426,11 +426,11 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
size_t l = input_shape1_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape1_.insert(input_shape1_.begin(), 1);
|
||||
(void)input_shape1_.insert(input_shape1_.begin(), 1);
|
||||
}
|
||||
l = input_shape2_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape2_.insert(input_shape2_.begin(), 1);
|
||||
(void)input_shape2_.insert(input_shape2_.begin(), 1);
|
||||
}
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_);
|
||||
|
@ -443,7 +443,8 @@ void ArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
bool ArithmeticCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
bool ArithmeticCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> & /* workspace */,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
|
|
|
@ -58,7 +58,7 @@ class ArithmeticCPUKernel : public CPUKernel {
|
|||
std::vector<size_t> input_element_num2_;
|
||||
std::vector<size_t> output_shape_;
|
||||
std::vector<size_t> output_element_num_;
|
||||
size_t output_size_;
|
||||
size_t output_size_{1};
|
||||
ArithmeticParameter op_para;
|
||||
OperateType operate_type_{ADD};
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
|
|
|
@ -177,7 +177,7 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
input_shape2_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
if (output_shape_.size() == 0) {
|
||||
output_shape_.insert(output_shape_.begin(), 1);
|
||||
(void)output_shape_.insert(output_shape_.begin(), 1);
|
||||
}
|
||||
|
||||
output_size_ = 1;
|
||||
|
@ -187,11 +187,11 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
size_t l = input_shape1_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape1_.insert(input_shape1_.begin(), 1);
|
||||
(void)input_shape1_.insert(input_shape1_.begin(), 1);
|
||||
}
|
||||
l = input_shape2_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape2_.insert(input_shape2_.begin(), 1);
|
||||
(void)input_shape2_.insert(input_shape2_.begin(), 1);
|
||||
}
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape2_, &input_element_num2_);
|
||||
|
@ -205,7 +205,7 @@ void ArithmeticLogicCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
template <typename T>
|
||||
bool ArithmeticLogicCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> & /* workspace */,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
|
|
|
@ -52,7 +52,7 @@ class ArithmeticLogicCPUKernel : public CPUKernel {
|
|||
std::vector<size_t> input_element_num2_;
|
||||
std::vector<size_t> output_shape_;
|
||||
std::vector<size_t> output_element_num_;
|
||||
size_t output_size_;
|
||||
size_t output_size_{1};
|
||||
OperateType operate_type_{ADD};
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
TypeId target_dtype_{kTypeUnknown};
|
||||
|
|
|
@ -264,7 +264,7 @@ void Atanh(const T *in, T *out, size_t size) {
|
|||
|
||||
template <typename T>
|
||||
void Identity(const T *in, T *out, size_t size) {
|
||||
std::copy(in, in + size, out);
|
||||
(void)std::copy(in, in + size, out);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@ void BroadcastToCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
size_t input_shape_size = input_shape_.size();
|
||||
size_t output_shape_size = output_shape_.size();
|
||||
|
||||
if (output_shape_size < input_shape_size) {
|
||||
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_
|
||||
<< " to a smaller dimension shape " << output_shape_ << ".";
|
||||
|
@ -68,7 +67,7 @@ bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, cons
|
|||
|
||||
const auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
int ret = NNACL_ERR;
|
||||
int ret = static_cast<int>(NNACL_ERR);
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
ret = BroadcastTo(bool, input_addr, &shape_info_, output_addr);
|
||||
} else if constexpr (std::is_same_v<T, int>) {
|
||||
|
@ -86,6 +85,5 @@ bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, cons
|
|||
<< " execute failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -370,6 +370,5 @@ std::vector<size_t> CPUKernelUtils::GetBroadcastShape(const std::vector<size_t>
|
|||
}
|
||||
return broadcast_shape;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -28,16 +28,16 @@ void DepthToSpaceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
CheckParam(kernel_node);
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
block_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size");
|
||||
block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size"));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool DepthToSpaceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
size_t size = IntToSize(inputs[0]->size / sizeof(T));
|
||||
size_t size = inputs[0]->size / sizeof(T);
|
||||
std::vector<size_t> input_shape = input_shape_;
|
||||
std::vector<size_t> output_shape = output_shape_;
|
||||
size_t block_size = block_size_;
|
||||
|
|
|
@ -37,7 +37,7 @@ class DepthToSpaceCPUKernel : public CPUKernel {
|
|||
void CheckParam(const CNodePtr &kernel_node);
|
||||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
size_t block_size_;
|
||||
size_t block_size_{0};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
|
|
|
@ -246,7 +246,7 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
|
|||
{prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
|
||||
{prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
|
||||
{prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
|
||||
if (inputs.size() < 2 || outputs.size() != 1) {
|
||||
if (inputs.size() < kInputMinNum || outputs.size() != kOutputNum) {
|
||||
MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size()
|
||||
<< " inputs and " << outputs.size() << " output.";
|
||||
return false;
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kInputMinNum = 2;
|
||||
constexpr size_t kOutputNum = 1;
|
||||
template <typename T>
|
||||
class EltWiseGradCPUKernel : public CPUKernel {
|
||||
public:
|
||||
|
|
|
@ -39,18 +39,19 @@ void L2NormalizeCPUKernel<T>::CalcDenominator(const T *input_addr, const size_t
|
|||
size_t stride = 1;
|
||||
std::vector<size_t> axes(input_shape_.size());
|
||||
int k = 0;
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
if (i != axis_) {
|
||||
size_t axis_size = IntToSize(axis_);
|
||||
for (size_t i = 0; i < IntToSize(dims); ++i) {
|
||||
if (i != axis_size) {
|
||||
axes[k] = i;
|
||||
++k;
|
||||
} else {
|
||||
stride *= input_shape_[i];
|
||||
}
|
||||
}
|
||||
axes[k] = axis_;
|
||||
axes[k] = axis_size;
|
||||
|
||||
std::vector<size_t> transpose_shape(input_shape_.size());
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
for (size_t i = 0; i < IntToSize(dims); ++i) {
|
||||
transpose_shape[i] = input_shape_[axes[i]];
|
||||
}
|
||||
|
||||
|
@ -109,7 +110,7 @@ void L2NormalizeCPUKernel<T>::CalcOutput(const T *input_addr, const std::vector<
|
|||
|
||||
template <typename T>
|
||||
bool L2NormalizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
|
|
@ -46,7 +46,7 @@ class L2NormalizeCPUKernel : public CPUKernel {
|
|||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
T epsilon_;
|
||||
int axis_;
|
||||
int axis_{0};
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
};
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ void L2NormalizeGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
int output_dim_length = output_shape.size();
|
||||
dim_elem_num_list_.resize(output_dim_length, 1);
|
||||
for (int i = output_dim_length - 2; i >= 0; i--) {
|
||||
for (int i = output_dim_length - 2; i >= 0; i--) { // from -2 to 0 dim
|
||||
dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1];
|
||||
}
|
||||
|
||||
|
@ -138,14 +138,15 @@ void L2NormalizeGradCPUKernel<T>::GetSumOfProduct(const std::vector<T> &x_vector
|
|||
for (size_t i = 0; i < len; i++) {
|
||||
tmp_vector[i] = x_vector[i] * y_vector[i];
|
||||
}
|
||||
if (len % 2 == 1) {
|
||||
const size_t half = 2;
|
||||
if (len % half == 1) {
|
||||
tmp_vector[0] += tmp_vector[len - 1];
|
||||
}
|
||||
for (size_t stride = len / 2; stride > 0; stride >>= 1) {
|
||||
for (size_t stride = len / half; stride > 0; stride >>= 1) {
|
||||
for (size_t i = 0; i < stride; i++) {
|
||||
tmp_vector[i] += tmp_vector[i + stride];
|
||||
}
|
||||
if (stride > 2 && stride % 2 == 1) {
|
||||
if (stride > half && stride % half == 1) {
|
||||
tmp_vector[0] += tmp_vector[stride - 1];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,12 +23,14 @@ namespace kernel {
|
|||
template <typename T>
|
||||
void MaskedSelectCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectCPUKernel needs 2 input.";
|
||||
if (input_num != kInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectCPUKernel needs " << kInputNum
|
||||
<< " input.";
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectCPUKernel needs 1 output.";
|
||||
if (output_num != kOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectCPUKernel needs " << kOutputNum
|
||||
<< " output.";
|
||||
}
|
||||
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
|
@ -69,7 +71,7 @@ bool MaskedSelectCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
|
|||
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
|
||||
}
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(j);
|
||||
(void)out_shape.emplace_back(j);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_num);
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kInputNum = 2;
|
||||
constexpr size_t kOutputNum = 1;
|
||||
template <typename T>
|
||||
class MaskedSelectCPUKernel : public CPUKernel {
|
||||
public:
|
||||
|
|
|
@ -23,16 +23,18 @@ namespace kernel {
|
|||
template <typename T>
|
||||
void MaskedSelectGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectGradCPUKernel needs 3 input.";
|
||||
if (input_num != kInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but MaskedSelectGradCPUKernel needs " << kInputNum
|
||||
<< " input.";
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectGradCPUKernel needs 1 output.";
|
||||
if (output_num != kOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but MaskedSelectGradCPUKernel needs " << kOutputNum
|
||||
<< " output.";
|
||||
}
|
||||
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
|
||||
input_shape_a_ = AnfAlgo::GetInputDeviceShape(kernel_node, INPUT);
|
||||
input_shape_b_ = AnfAlgo::GetInputDeviceShape(kernel_node, MASK);
|
||||
grad_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, GRAD);
|
||||
output_shape_ = CPUKernelUtils::GetBroadcastShape(input_shape_a_, input_shape_b_);
|
||||
for (const uint64_t &d : output_shape_) {
|
||||
tensor_size_ *= d;
|
||||
|
@ -43,9 +45,9 @@ template <typename T>
|
|||
bool MaskedSelectGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto mask = reinterpret_cast<bool *>(inputs[1]->addr);
|
||||
auto grad = reinterpret_cast<T *>(inputs[2]->addr);
|
||||
auto dx = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto mask = reinterpret_cast<bool *>(inputs[MASK]->addr);
|
||||
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
|
||||
auto dx = reinterpret_cast<T *>(outputs[INPUT]->addr);
|
||||
|
||||
auto ret = memset_s(dx, outputs[0]->size, 0, outputs[0]->size);
|
||||
if (ret != EOK) {
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kInputNum = 3;
|
||||
constexpr size_t kOutputNum = 1;
|
||||
template <typename T>
|
||||
class MaskedSelectGradCPUKernel : public CPUKernel {
|
||||
public:
|
||||
|
@ -41,6 +43,7 @@ class MaskedSelectGradCPUKernel : public CPUKernel {
|
|||
std::vector<size_t> grad_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
uint64_t tensor_size_ = 1;
|
||||
enum input_list_ { INPUT, MASK, GRAD };
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(MaskedSelectGrad,
|
||||
|
|
|
@ -35,7 +35,7 @@ void BatchNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
momentum = AnfAlgo::GetNodeAttr<float>(kernel_node, "momentum");
|
||||
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (x_shape.size() == 2) {
|
||||
x_shape.insert(x_shape.end(), 2, 1);
|
||||
x_shape.insert(x_shape.end(), 2, 1); // expand 2 dim: NC -> NCHW
|
||||
} else if (x_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Batchnorm only support nchw input!";
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ namespace mindspore {
|
|||
namespace kernel {
|
||||
const int kMaxLSTMLayer = 100;
|
||||
const int kOutputWorkSpaceIndex = 3;
|
||||
const size_t kGateNum = 4;
|
||||
void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
|
||||
CPUKernel::InitInputOutputSize(kernel_node);
|
||||
output_size_list_[kOutputWorkSpaceIndex] = reserve_size_;
|
||||
|
@ -31,10 +32,10 @@ void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
|
|||
auto output_types = std::vector<TypeId>(output_num, output_type);
|
||||
std::vector<std::vector<size_t>> output_shapes;
|
||||
for (size_t output_index = 0; output_index < output_num; ++output_index) {
|
||||
std::vector<size_t> shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
|
||||
auto shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
|
||||
output_shapes.emplace_back(shape);
|
||||
}
|
||||
size_t len = reserve_size_ / 4;
|
||||
size_t len = reserve_size_ / kGateNum;
|
||||
output_shapes[kOutputWorkSpaceIndex] = {len, 1};
|
||||
AnfAlgo::SetOutputInferTypeAndShape(output_types, output_shapes, kernel_node.get());
|
||||
}
|
||||
|
@ -56,9 +57,9 @@ void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
dim src_dims = {seq_len_, batch_size_, input_size_};
|
||||
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
|
||||
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
|
||||
bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_};
|
||||
weights_dims_ = {num_layers_, num_directions_, input_size_, kGateNum, hidden_size_};
|
||||
weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, kGateNum, hidden_size_};
|
||||
bias_dims_ = {num_layers_, num_directions_, kGateNum, hidden_size_};
|
||||
dim dst_dims = {seq_len_, batch_size_, static_cast<int64_t>(hidden_size_) * num_directions_};
|
||||
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
|
@ -115,7 +116,7 @@ void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) {
|
|||
if (bidirectional_) {
|
||||
num_directions_ = 2;
|
||||
}
|
||||
const int gate_size = 4 * hidden_size_;
|
||||
const int gate_size = kGateNum * hidden_size_;
|
||||
if (num_layers_ <= 0) {
|
||||
MS_LOG(EXCEPTION) << "Layers must be greater than zero!";
|
||||
}
|
||||
|
|
|
@ -38,8 +38,8 @@ void PrintCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
template <typename T>
|
||||
bool PrintCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /*outputs*/) {
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> & /* outputs */) {
|
||||
auto data_type = CheckType();
|
||||
if (data_type == kTypeUnknown) {
|
||||
MS_LOG(EXCEPTION) << "CPU print does not support the input type.";
|
||||
|
|
|
@ -41,9 +41,9 @@ class ResizeBilinearCPUKernel : public CPUKernel {
|
|||
private:
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
bool align_corners_ = false;
|
||||
float height_scale;
|
||||
float width_scale;
|
||||
bool align_corners_{false};
|
||||
float height_scale{1.0};
|
||||
float width_scale{1.0};
|
||||
std::vector<int64_t> size_;
|
||||
std::vector<size_t> shape_;
|
||||
};
|
||||
|
|
|
@ -117,6 +117,5 @@ bool RMSPropCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename T>
|
||||
void ScatterArithmeticCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
CheckParam(kernel_node);
|
||||
|
@ -43,11 +42,11 @@ template <typename T>
|
|||
void ScatterArithmeticCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) const {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
if (input_num != kInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
if (output_num != kOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
|
||||
}
|
||||
}
|
||||
|
@ -65,9 +64,9 @@ bool ScatterArithmeticCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr>
|
|||
{"ScatterMin", &ScatterArithmeticCPUKernel<T>::ScatterMin},
|
||||
{"ScatterUpdate", &ScatterArithmeticCPUKernel<T>::ScatterUpdate}};
|
||||
if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) {
|
||||
T *input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
int *indices = reinterpret_cast<int *>(inputs[1]->addr);
|
||||
T *updates = reinterpret_cast<T *>(inputs[2]->addr);
|
||||
T *input = reinterpret_cast<T *>(inputs[INPUT]->addr);
|
||||
int *indices = reinterpret_cast<int *>(inputs[INDICES]->addr);
|
||||
T *updates = reinterpret_cast<T *>(inputs[UPDATES]->addr);
|
||||
T *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates);
|
||||
auto bufferSize = outputs[0]->size;
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kInputNum = 3;
|
||||
constexpr size_t kOutputNum = 1;
|
||||
template <typename T>
|
||||
class ScatterArithmeticCPUKernel : public CPUKernel {
|
||||
public:
|
||||
|
@ -55,6 +57,7 @@ class ScatterArithmeticCPUKernel : public CPUKernel {
|
|||
size_t inner_size_{0};
|
||||
size_t indices_size_{0};
|
||||
std::string kernel_name_;
|
||||
enum input_list_ { INPUT, INDICES, UPDATES };
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(ScatterAdd,
|
||||
|
|
|
@ -51,12 +51,12 @@ bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::v
|
|||
const std::vector<AddressPtr> &outputs) {
|
||||
CheckParam(inputs, outputs);
|
||||
|
||||
auto param = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto grad = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto lr = reinterpret_cast<T *>(inputs[2]->addr);
|
||||
auto accum = reinterpret_cast<T *>(inputs[3]->addr);
|
||||
auto momentum = reinterpret_cast<T *>(inputs[4]->addr);
|
||||
auto stat = reinterpret_cast<T *>(inputs[5]->addr);
|
||||
auto param = reinterpret_cast<T *>(inputs[PARAM]->addr);
|
||||
auto grad = reinterpret_cast<T *>(inputs[GRAD]->addr);
|
||||
auto lr = reinterpret_cast<T *>(inputs[LR]->addr);
|
||||
auto accum = reinterpret_cast<T *>(inputs[ACCUM]->addr);
|
||||
auto momentum = reinterpret_cast<T *>(inputs[MOMENTUM]->addr);
|
||||
auto stat = reinterpret_cast<T *>(inputs[STAT]->addr);
|
||||
auto output_param = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
size_t elem_num = inputs[0]->size / sizeof(T);
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ class SGDCPUKernel : public CPUKernel {
|
|||
float dampening_{0.0};
|
||||
float weight_decay_{0.0};
|
||||
bool nesterov_{true};
|
||||
enum input_list_ { PARAM, GRAD, LR, ACCUM, MOMENTUM, STAT };
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(SGD,
|
||||
|
|
|
@ -29,16 +29,16 @@ void SpaceToDepthCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
block_size_ = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size");
|
||||
block_size_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "block_size"));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SpaceToDepthCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
size_t size = IntToSize(inputs[0]->size / sizeof(T));
|
||||
size_t size = inputs[0]->size / sizeof(T);
|
||||
|
||||
std::vector<size_t> input_shape = input_shape_;
|
||||
std::vector<size_t> output_shape = output_shape_;
|
||||
|
|
|
@ -36,7 +36,7 @@ class SpaceToDepthCPUKernel : public CPUKernel {
|
|||
void CheckParam(const CNodePtr &kernel_node);
|
||||
std::vector<size_t> input_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
size_t block_size_;
|
||||
size_t block_size_{0};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
|
|
|
@ -24,14 +24,14 @@ template <typename I, typename T>
|
|||
void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
adj_st_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST);
|
||||
adj_dt_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT);
|
||||
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (indices_shape.size() != 2 && indices_shape[1] != 2) {
|
||||
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES);
|
||||
if (indices_shape.size() != kIndicesSizeNum && indices_shape[1] != kIndices2rdDimNum) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "SparseTensorDenseMatmul requires 'indices' should be a 2-D Tensor and the second dimension length "
|
||||
"should be 2, but got 'indices' shape: "
|
||||
<< indices_shape;
|
||||
}
|
||||
auto values_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto values_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
|
||||
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "SparseTensorDenseMatmul requires 'value's should be a 1-D Tensor and the first dimension length should be "
|
||||
|
@ -40,14 +40,14 @@ void SparseTensorDenseMatmulCPUKernel<I, T>::InitKernel(const CNodePtr &kernel_n
|
|||
}
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
values_size_ = values_shape[0];
|
||||
b_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
b_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, DENSE);
|
||||
}
|
||||
|
||||
template <typename I, typename T>
|
||||
bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() != 4 || outputs.size() != 1) {
|
||||
if (inputs.size() != kInputNum || outputs.size() != kOutputNum) {
|
||||
MS_LOG(ERROR) << "SparseTensorDenseMatmul requires 4 inputs and 1 output, but got " << inputs.size()
|
||||
<< " inputs and " << outputs.size() << " output.";
|
||||
return false;
|
||||
|
@ -74,7 +74,7 @@ bool SparseTensorDenseMatmulCPUKernel<I, T>::Launch(const std::vector<kernel::Ad
|
|||
const size_t same_dim = adj_dt_ ? b_dim_1 : b_dim_0;
|
||||
|
||||
for (size_t i = 0; i < values_size_; ++i) {
|
||||
if (i * 2 + 1 >= indices_length) {
|
||||
if (i * 2 + 1 >= indices_length) { // the interval is 2
|
||||
MS_LOG(EXCEPTION) << "The index of a_indices out of bounds.";
|
||||
}
|
||||
if (i >= values_length) {
|
||||
|
|
|
@ -23,6 +23,10 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kInputNum = 4;
|
||||
constexpr size_t kOutputNum = 1;
|
||||
constexpr size_t kIndicesSizeNum = 2;
|
||||
constexpr size_t kIndices2rdDimNum = 2;
|
||||
template <typename I, typename T>
|
||||
class SparseTensorDenseMatmulCPUKernel : public CPUKernel {
|
||||
public:
|
||||
|
@ -41,6 +45,7 @@ class SparseTensorDenseMatmulCPUKernel : public CPUKernel {
|
|||
size_t values_size_{0};
|
||||
bool adj_st_{false};
|
||||
bool adj_dt_{false};
|
||||
enum input_list_ { INDICES, VALUES, SPARSE_SHAPE, DENSE };
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(SparseTensorDenseMatmul,
|
||||
|
|
|
@ -33,7 +33,7 @@ void SplitCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
|||
template <typename T>
|
||||
void SplitCPUKernel<T>::InitInputOutputSize(const CNodePtr &kernel_node) {
|
||||
CPUKernel::InitInputOutputSize(kernel_node);
|
||||
workspace_size_list_.emplace_back((sizeof(T *) * output_num_));
|
||||
(void)workspace_size_list_.emplace_back((sizeof(T *) * static_cast<size_t>(output_num_)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -45,12 +45,12 @@ bool SplitCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
|
||||
void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t /* size */) {
|
||||
SplitParameter param;
|
||||
param.num_split_ = output_num_;
|
||||
param.split_dim_ = axis_;
|
||||
param.strides_[input_shape_.size() - 1] = 1;
|
||||
for (int i = input_shape_.size() - 2; i >= 0; i--) {
|
||||
for (int i = input_shape_.size() - 2; i >= 0; i--) { // from -2 to 0 dim
|
||||
param.strides_[i] = param.strides_[i + 1] * input_shape_[i + 1];
|
||||
}
|
||||
auto split_sizes = std::make_unique<int[]>(param.num_split_);
|
||||
|
@ -103,7 +103,7 @@ void SplitCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
|
|||
if (axis_ < 0) {
|
||||
axis_ += SizeToInt(input_shape_.size());
|
||||
}
|
||||
if (output_num_ > SizeToInt(input_shape_[axis_])) {
|
||||
if (output_num_ > IntToLong(input_shape_[axis_])) {
|
||||
MS_LOG(EXCEPTION) << "Attr output_num " << output_num_ << " must less than " << input_shape_[axis_];
|
||||
}
|
||||
if (output_num_ != output_num) {
|
||||
|
|
|
@ -23,8 +23,10 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void StridedSliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
// CheckParam(kernel_node);
|
||||
param_ = (struct StridedSliceParameter *)malloc(sizeof(struct StridedSliceParameter));
|
||||
if (param_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc StridedSliceGradParameter failed.";
|
||||
}
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
||||
switch (dtype_) {
|
||||
|
@ -91,7 +93,7 @@ void StridedSliceGradCPUKernel::ExpandAllMemberDims() {
|
|||
}
|
||||
|
||||
bool StridedSliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
bool ret{true};
|
||||
if (dtype_ == kNumberTypeFloat32) {
|
||||
|
|
|
@ -43,7 +43,7 @@ class StridedSliceGradCPUKernel : public CPUKernel {
|
|||
std::vector<int> input_shape_;
|
||||
std::vector<size_t> output_shape_;
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
StridedSliceParameter *param_;
|
||||
StridedSliceParameter *param_{nullptr};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
|
|
|
@ -46,7 +46,7 @@ void TensorCopySlicesCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
bool TensorCopySlicesCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> & /* workspace */,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() != 2 || outputs.size() != 1) {
|
||||
MS_LOG(ERROR) << "TensorCopySlices requires 1 input and 1 output, but got " << inputs.size() << " input and "
|
||||
|
|
|
@ -111,7 +111,9 @@ void TileCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const st
|
|||
tile_parameter_.data_size_ = sizeof(T);
|
||||
|
||||
if (one_dim_tile_) {
|
||||
auto task = [&](size_t start, size_t end) { TileSimple(x_addr, y_addr, start, end, &tile_parameter_); };
|
||||
auto task = [&x_addr, &y_addr, this](size_t start, size_t end) {
|
||||
TileSimple(x_addr, y_addr, start, end, &tile_parameter_);
|
||||
};
|
||||
ParallelLaunchAutoSearch(task, tile_parameter_.fast_outer_size_, this, ¶llel_search_info_);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ DEFINE_int32(image_width, 832, "image width");
|
|||
|
||||
int Resize_Affine(const MSTensor &input, MSTensor *output) {
|
||||
int new_height, new_width;
|
||||
float scale = 0.999;
|
||||
const float scale = 0.999;
|
||||
auto imgResize = MSTensor();
|
||||
std::vector<int64_t> shape = input.Shape();
|
||||
new_height = static_cast<int>(shape[0] * scale);
|
||||
|
|
Loading…
Reference in New Issue