add sequenceaddn for aicpu

This commit is contained in:
VectorSL 2023-03-06 10:19:37 +08:00
parent 1bfe24bd98
commit 679cfdb85b
8 changed files with 204 additions and 1 deletions

View File

@ -45,6 +45,7 @@ if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER})
${CMAKE_CURRENT_SOURCE_DIR}/concat_offset_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/drop_out_gen_mask_kernels.cc
${CMAKE_CURRENT_SOURCE_DIR}/sequence_add.cc
${CMAKE_CURRENT_SOURCE_DIR}/sequence_addn.cc
${CMAKE_CURRENT_SOURCE_DIR}/sequence_add_offset.cc
${CMAKE_CURRENT_SOURCE_DIR}/slice_grad_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/random_shuffle_kernel.cc

View File

@ -0,0 +1,117 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h"
#include <string>
#include <thread>
#include <complex>
#include "proto/aicpu_tensor.pb.h"
#include "common/atomic_op.h"
#include "utils/eigen_tensor.h"
#include "aicpu_sharder/aicpu_sharder.h"
namespace aicpu {
namespace {
std::vector<int64_t> GetShape(const ::aicpuops::TensorShape &shape) {
std::vector<int64_t> res;
for (int i = 0; i < shape.dim_size(); ++i) {
res.push_back(shape.dim(i).size());
}
return res;
}
} // namespace
constexpr size_t kSequenceAddNInputNum = 1;
constexpr size_t kSequenceAddNOutputNum = 1;
constexpr auto kDim0 = 0;
constexpr auto kDim1 = 1;
uint32_t SequenceAddNKernel::ParseKernelParam() {
if (node_def_.inputs_size() != kSequenceAddNInputNum) {
AICPU_LOGE("For 'SequenceAddN', input number must be 1, but got %d", node_def_.inputs_size());
return kAicpuKernelStateInvalid;
}
if (node_def_.outputs_size() != kSequenceAddNOutputNum) {
AICPU_LOGE("For 'SequenceAddN', output number must be 1, but got %d", node_def_.outputs_size());
return kAicpuKernelStateInvalid;
}
aicpuops::Tensor input_tensor = node_def_.inputs(0);
input_data_type_ = static_cast<aicpuops::DataType>(input_tensor.tensor_type());
auto input_shape = GetShape(input_tensor.tensor_shape());
input_shapes_.push_back(input_shape);
input_data_size_ = GetTensorMemSizeByShape(node_def_.inputs(kDim0));
output_data_size_ = GetTensorMemSizeByShape(node_def_.outputs(kDim0));
return kAicpuKernelStateSucess;
}
template <typename T>
uint32_t SequenceAddNKernel::SequenceAddNTask() {
const auto inputs_addr = reinterpret_cast<T *>(io_addrs_[kDim0]);
auto output_addr = reinterpret_cast<T *>(io_addrs_[kDim1]);
auto element_num = LongToSize(input_shapes_[0][0]);
auto element_size = output_data_size_ / sizeof(T);
auto cp_ret = memset_s(output_addr, output_data_size_, 0x0, output_data_size_);
if (cp_ret != EOK) {
AICPU_LOGE("For 'SequenceAddN', memset for output error, errorno: %d, size: %d.", cp_ret, output_data_size_);
return kAicpuKernelStateInvalid;
}
auto input_x_addr = inputs_addr;
auto sequence_add_n = [this, &output_addr, &input_x_addr](size_t start, size_t end) {
for (size_t id = start; id < end; id++) {
AtomicAdd<T>(output_addr + id, input_x_addr[id]);
}
};
const int64_t per_unit_size = element_size / std::thread::hardware_concurrency();
for (size_t i = 0; i < element_num; i++) {
input_x_addr = inputs_addr + i * element_size;
ParallelFor(element_size, per_unit_size, sequence_add_n);
}
return kAicpuKernelStateSucess;
}
uint32_t SequenceAddNKernel::DoCompute() {
switch (input_data_type_) {
case aicpuops::DataType::MS_INT32:
return SequenceAddNTask<int>();
case aicpuops::DataType::MS_INT64:
return SequenceAddNTask<int64_t>();
case aicpuops::DataType::MS_FLOAT32:
return SequenceAddNTask<float>();
case aicpuops::DataType::MS_FLOAT64:
return SequenceAddNTask<double>();
case aicpuops::DataType::MS_UINT32:
return SequenceAddNTask<uint32_t>();
case aicpuops::DataType::MS_UINT64:
return SequenceAddNTask<uint64_t>();
case aicpuops::DataType::MS_FLOAT16:
return SequenceAddNTask<Eigen::half>();
case aicpuops::DataType::MS_COMPLEX64:
return SequenceAddNTask<std::complex<std::float_t>>();
case aicpuops::DataType::MS_COMPLEX128:
return SequenceAddNTask<std::complex<std::double_t>>();
default:
AICPU_LOGE("SequenceAddN kernel data type [%s] not support.", input_data_type_);
return kAicpuKernelStateInvalid;
}
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t SequenceAddN(void *param) {
aicpu::SequenceAddNKernel sequence_addn_kernel;
return sequence_addn_kernel.Compute(param);
}
}

View File

@ -0,0 +1,41 @@
/**
* Copyright 2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_
#define AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_
#include <vector>
#include <random>
#include "common/kernel_base.h"
namespace aicpu {
class SequenceAddNKernel : public KernelBase {
public:
SequenceAddNKernel() : KernelBase("SequenceAddN") {}
~SequenceAddNKernel() = default;
protected:
uint32_t ParseKernelParam() override;
uint32_t DoCompute() override;
template <typename T>
uint32_t SequenceAddNTask();
aicpuops::DataType input_data_type_{aicpuops::DataType::MS_UNKNOWN};
size_t input_data_size_{0};
size_t output_data_size_{0};
std::vector<std::vector<int64_t>> input_shapes_;
};
} // namespace aicpu
#endif // AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_

View File

@ -105,6 +105,7 @@ constexpr auto kKLDivLoss = "KLDivLoss";
constexpr auto kKLDivLossGrad = "KLDivLossGrad";
constexpr auto kSampleDistortedBoundingBoxV2 = "SampleDistortedBoundingBoxV2";
constexpr auto kSequenceAdd = "SequenceAdd";
constexpr auto kSequenceAddN = "SequenceAddN";
constexpr auto kSequenceAddOffset = "SequenceAddOffset";
constexpr auto kSparseToDenseV2 = "SparseToDenseV2";
constexpr auto kSparseSoftmaxCrossEntropyWithLogitsV2 = "SparseSoftmaxCrossEntropyWithLogitsV2";
@ -292,6 +293,7 @@ const std::set<std::string> kCpuKernelBaseOps{kDropoutGenMaskOpName,
kGatherDGradV2,
kConcatOffset,
kSequenceAdd,
kSequenceAddN,
kSequenceAddOffset,
kSliceGrad,
kRandomShuffle,

View File

@ -1,5 +1,5 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
* Copyright 2022-2023 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -42,6 +42,7 @@ bool AICpuLibSelectPass::Process(const AnfNodePtr &node) const {
kGatherDGradV2OpName,
kConcatOffsetOpName,
kSequenceAddOpName,
kSequenceAddNOpName,
kSequenceAddOffsetOpName,
kSliceGradOpName,
kRandomShuffleOpName,

View File

@ -420,3 +420,4 @@ from .sparse_to_dense_v2 import _sparse_to_dense_v2_aicpu
from .bernoulli import _bernoulli_aicpu
from .glu_grad import _glu_grad_aicpu
from .sspaddmm import _sspaddmm_aicpu
from .sequence_addn import _sequence_addn_aicpu

View File

@ -0,0 +1,38 @@
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""SequenceAddN op"""
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
sequence_addn_op_info = AiCPURegOp("SequenceAddN") \
.fusion_type("OPAQUE") \
.input(0, "input_0", "required") \
.output(0, "output_data", "required") \
.dtype_format(DataType.U32_Default_Tuple, DataType.U32_Default) \
.dtype_format(DataType.U64_Default_Tuple, DataType.U64_Default) \
.dtype_format(DataType.I64_Default_Tuple, DataType.I64_Default) \
.dtype_format(DataType.I32_Default_Tuple, DataType.I32_Default) \
.dtype_format(DataType.F64_Default_Tuple, DataType.F64_Default) \
.dtype_format(DataType.F32_Default_Tuple, DataType.F32_Default) \
.dtype_format(DataType.F16_Default_Tuple, DataType.F16_Default) \
.dtype_format(DataType.C64_Default_Tuple, DataType.C64_Default) \
.dtype_format(DataType.C128_Default_Tuple, DataType.C128_Default) \
.get_op_info()
@op_info_register(sequence_addn_op_info)
def _sequence_addn_aicpu():
"""SequenceAddN AiCPU register"""
return

View File

@ -1331,3 +1331,5 @@ class DataType:
C64_Default = ("complex64", "DefaultFormat")
C128_Default = ("complex128", "DefaultFormat")
C64_Default_Tuple = ("complex64", "DefaultFormat", "tuple")
C128_Default_Tuple = ("complex128", "DefaultFormat", "tuple")