diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/CMakeLists.txt b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/CMakeLists.txt index e0773ca70d9..043d8ac122f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/CMakeLists.txt @@ -45,6 +45,7 @@ if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER}) ${CMAKE_CURRENT_SOURCE_DIR}/concat_offset_kernel.cc ${CMAKE_CURRENT_SOURCE_DIR}/drop_out_gen_mask_kernels.cc ${CMAKE_CURRENT_SOURCE_DIR}/sequence_add.cc + ${CMAKE_CURRENT_SOURCE_DIR}/sequence_addn.cc ${CMAKE_CURRENT_SOURCE_DIR}/sequence_add_offset.cc ${CMAKE_CURRENT_SOURCE_DIR}/slice_grad_kernel.cc ${CMAKE_CURRENT_SOURCE_DIR}/random_shuffle_kernel.cc diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.cc new file mode 100644 index 00000000000..1034b07ef8e --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h" +#include +#include +#include +#include "proto/aicpu_tensor.pb.h" +#include "common/atomic_op.h" +#include "utils/eigen_tensor.h" +#include "aicpu_sharder/aicpu_sharder.h" + +namespace aicpu { +namespace { +std::vector GetShape(const ::aicpuops::TensorShape &shape) { + std::vector res; + for (int i = 0; i < shape.dim_size(); ++i) { + res.push_back(shape.dim(i).size()); + } + return res; +} +} // namespace +constexpr size_t kSequenceAddNInputNum = 1; +constexpr size_t kSequenceAddNOutputNum = 1; +constexpr auto kDim0 = 0; +constexpr auto kDim1 = 1; + +uint32_t SequenceAddNKernel::ParseKernelParam() { + if (node_def_.inputs_size() != kSequenceAddNInputNum) { + AICPU_LOGE("For 'SequenceAddN', input number must be 1, but got %d", node_def_.inputs_size()); + return kAicpuKernelStateInvalid; + } + + if (node_def_.outputs_size() != kSequenceAddNOutputNum) { + AICPU_LOGE("For 'SequenceAddN', output number must be 1, but got %d", node_def_.outputs_size()); + return kAicpuKernelStateInvalid; + } + aicpuops::Tensor input_tensor = node_def_.inputs(0); + input_data_type_ = static_cast(input_tensor.tensor_type()); + auto input_shape = GetShape(input_tensor.tensor_shape()); + input_shapes_.push_back(input_shape); + input_data_size_ = GetTensorMemSizeByShape(node_def_.inputs(kDim0)); + output_data_size_ = GetTensorMemSizeByShape(node_def_.outputs(kDim0)); + return kAicpuKernelStateSucess; +} + +template +uint32_t SequenceAddNKernel::SequenceAddNTask() { + const auto inputs_addr = reinterpret_cast(io_addrs_[kDim0]); + auto output_addr = reinterpret_cast(io_addrs_[kDim1]); + auto element_num = LongToSize(input_shapes_[0][0]); + auto element_size = output_data_size_ / sizeof(T); + auto cp_ret = memset_s(output_addr, output_data_size_, 0x0, output_data_size_); + if (cp_ret != EOK) { + AICPU_LOGE("For 'SequenceAddN', memset for output error, errorno: %d, size: %d.", cp_ret, output_data_size_); + return kAicpuKernelStateInvalid; + } + auto input_x_addr = inputs_addr; + auto sequence_add_n = [this, &output_addr, &input_x_addr](size_t start, size_t end) { + for (size_t id = start; id < end; id++) { + AtomicAdd(output_addr + id, input_x_addr[id]); + } + }; + const int64_t per_unit_size = element_size / std::thread::hardware_concurrency(); + for (size_t i = 0; i < element_num; i++) { + input_x_addr = inputs_addr + i * element_size; + ParallelFor(element_size, per_unit_size, sequence_add_n); + } + + return kAicpuKernelStateSucess; +} + +uint32_t SequenceAddNKernel::DoCompute() { + switch (input_data_type_) { + case aicpuops::DataType::MS_INT32: + return SequenceAddNTask(); + case aicpuops::DataType::MS_INT64: + return SequenceAddNTask(); + case aicpuops::DataType::MS_FLOAT32: + return SequenceAddNTask(); + case aicpuops::DataType::MS_FLOAT64: + return SequenceAddNTask(); + case aicpuops::DataType::MS_UINT32: + return SequenceAddNTask(); + case aicpuops::DataType::MS_UINT64: + return SequenceAddNTask(); + case aicpuops::DataType::MS_FLOAT16: + return SequenceAddNTask(); + case aicpuops::DataType::MS_COMPLEX64: + return SequenceAddNTask>(); + case aicpuops::DataType::MS_COMPLEX128: + return SequenceAddNTask>(); + default: + AICPU_LOGE("SequenceAddN kernel data type [%s] not support.", input_data_type_); + return kAicpuKernelStateInvalid; + } +} +} // namespace aicpu + +extern "C" { +__attribute__((visibility("default"))) uint32_t SequenceAddN(void *param) { + aicpu::SequenceAddNKernel sequence_addn_kernel; + return sequence_addn_kernel.Compute(param); +} +} diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h new file mode 100644 index 00000000000..b768d29069c --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h @@ -0,0 +1,41 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_ +#define AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_ + +#include +#include +#include "common/kernel_base.h" + +namespace aicpu { +class SequenceAddNKernel : public KernelBase { + public: + SequenceAddNKernel() : KernelBase("SequenceAddN") {} + ~SequenceAddNKernel() = default; + + protected: + uint32_t ParseKernelParam() override; + uint32_t DoCompute() override; + template + uint32_t SequenceAddNTask(); + + aicpuops::DataType input_data_type_{aicpuops::DataType::MS_UNKNOWN}; + size_t input_data_size_{0}; + size_t output_data_size_{0}; + std::vector> input_shapes_; +}; +} // namespace aicpu +#endif // AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_util.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_util.h index 69a8f5e6857..7d9c1600cc8 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_util.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_util.h @@ -107,6 +107,7 @@ constexpr auto kKLDivLoss = "KLDivLoss"; constexpr auto kKLDivLossGrad = "KLDivLossGrad"; constexpr auto kSampleDistortedBoundingBoxV2 = "SampleDistortedBoundingBoxV2"; constexpr auto kSequenceAdd = "SequenceAdd"; +constexpr auto kSequenceAddN = "SequenceAddN"; constexpr auto kSequenceAddOffset = "SequenceAddOffset"; constexpr auto kSparseToDenseV2 = "SparseToDenseV2"; constexpr auto kSparseSoftmaxCrossEntropyWithLogitsV2 = "SparseSoftmaxCrossEntropyWithLogitsV2"; @@ -297,6 +298,7 @@ const std::set kCpuKernelBaseOps{kDropoutGenMaskOpName, kGatherDGradV2, kConcatOffset, kSequenceAdd, + kSequenceAddN, kSequenceAddOffset, kSliceGrad, kRandomShuffle, diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc index f865843277a..1bafbe91823 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc @@ -1,5 +1,5 @@ /** - * Copyright 2022 Huawei Technologies Co., Ltd + * Copyright 2022-2023 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ bool AICpuLibSelectPass::Process(const AnfNodePtr &node) const { kGatherDGradV2OpName, kConcatOffsetOpName, kSequenceAddOpName, + kSequenceAddNOpName, kSequenceAddOffsetOpName, kSliceGradOpName, kRandomShuffleOpName, diff --git a/mindspore/python/mindspore/ops/_op_impl/aicpu/__init__.py b/mindspore/python/mindspore/ops/_op_impl/aicpu/__init__.py index bd8bbb409ff..f3cf049f791 100644 --- a/mindspore/python/mindspore/ops/_op_impl/aicpu/__init__.py +++ b/mindspore/python/mindspore/ops/_op_impl/aicpu/__init__.py @@ -423,3 +423,4 @@ from .sparse_to_dense_v2 import _sparse_to_dense_v2_aicpu from .bernoulli import _bernoulli_aicpu from .glu_grad import _glu_grad_aicpu from .sspaddmm import _sspaddmm_aicpu +from .sequence_addn import _sequence_addn_aicpu diff --git a/mindspore/python/mindspore/ops/_op_impl/aicpu/sequence_addn.py b/mindspore/python/mindspore/ops/_op_impl/aicpu/sequence_addn.py new file mode 100644 index 00000000000..8efe4375001 --- /dev/null +++ b/mindspore/python/mindspore/ops/_op_impl/aicpu/sequence_addn.py @@ -0,0 +1,38 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""SequenceAddN op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +sequence_addn_op_info = AiCPURegOp("SequenceAddN") \ + .fusion_type("OPAQUE") \ + .input(0, "input_0", "required") \ + .output(0, "output_data", "required") \ + .dtype_format(DataType.U32_Default_Tuple, DataType.U32_Default) \ + .dtype_format(DataType.U64_Default_Tuple, DataType.U64_Default) \ + .dtype_format(DataType.I64_Default_Tuple, DataType.I64_Default) \ + .dtype_format(DataType.I32_Default_Tuple, DataType.I32_Default) \ + .dtype_format(DataType.F64_Default_Tuple, DataType.F64_Default) \ + .dtype_format(DataType.F32_Default_Tuple, DataType.F32_Default) \ + .dtype_format(DataType.F16_Default_Tuple, DataType.F16_Default) \ + .dtype_format(DataType.C64_Default_Tuple, DataType.C64_Default) \ + .dtype_format(DataType.C128_Default_Tuple, DataType.C128_Default) \ + .get_op_info() + + +@op_info_register(sequence_addn_op_info) +def _sequence_addn_aicpu(): + """SequenceAddN AiCPU register""" + return diff --git a/mindspore/python/mindspore/ops/op_info_register.py b/mindspore/python/mindspore/ops/op_info_register.py index 594d09ddaf4..8e0565d4688 100644 --- a/mindspore/python/mindspore/ops/op_info_register.py +++ b/mindspore/python/mindspore/ops/op_info_register.py @@ -1331,3 +1331,5 @@ class DataType: C64_Default = ("complex64", "DefaultFormat") C128_Default = ("complex128", "DefaultFormat") + C64_Default_Tuple = ("complex64", "DefaultFormat", "tuple") + C128_Default_Tuple = ("complex128", "DefaultFormat", "tuple")