!49816 AICPU SequenceAddN
Merge pull request !49816 from VectorSL/add-sequence-addn-aicpu
This commit is contained in:
commit
45068d436d
|
@ -45,6 +45,7 @@ if(EXISTS ${CMAKE_C_COMPILER} AND EXISTS ${CMAKE_CXX_COMPILER})
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/concat_offset_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/drop_out_gen_mask_kernels.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sequence_add.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sequence_addn.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sequence_add_offset.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/slice_grad_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/random_shuffle_kernel.cc
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/ascend/kernel/aicpu/aicpu_ops/sequence_addn.h"
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <complex>
|
||||
#include "proto/aicpu_tensor.pb.h"
|
||||
#include "common/atomic_op.h"
|
||||
#include "utils/eigen_tensor.h"
|
||||
#include "aicpu_sharder/aicpu_sharder.h"
|
||||
|
||||
namespace aicpu {
|
||||
namespace {
|
||||
std::vector<int64_t> GetShape(const ::aicpuops::TensorShape &shape) {
|
||||
std::vector<int64_t> res;
|
||||
for (int i = 0; i < shape.dim_size(); ++i) {
|
||||
res.push_back(shape.dim(i).size());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
} // namespace
|
||||
constexpr size_t kSequenceAddNInputNum = 1;
|
||||
constexpr size_t kSequenceAddNOutputNum = 1;
|
||||
constexpr auto kDim0 = 0;
|
||||
constexpr auto kDim1 = 1;
|
||||
|
||||
uint32_t SequenceAddNKernel::ParseKernelParam() {
|
||||
if (node_def_.inputs_size() != kSequenceAddNInputNum) {
|
||||
AICPU_LOGE("For 'SequenceAddN', input number must be 1, but got %d", node_def_.inputs_size());
|
||||
return kAicpuKernelStateInvalid;
|
||||
}
|
||||
|
||||
if (node_def_.outputs_size() != kSequenceAddNOutputNum) {
|
||||
AICPU_LOGE("For 'SequenceAddN', output number must be 1, but got %d", node_def_.outputs_size());
|
||||
return kAicpuKernelStateInvalid;
|
||||
}
|
||||
aicpuops::Tensor input_tensor = node_def_.inputs(0);
|
||||
input_data_type_ = static_cast<aicpuops::DataType>(input_tensor.tensor_type());
|
||||
auto input_shape = GetShape(input_tensor.tensor_shape());
|
||||
input_shapes_.push_back(input_shape);
|
||||
input_data_size_ = GetTensorMemSizeByShape(node_def_.inputs(kDim0));
|
||||
output_data_size_ = GetTensorMemSizeByShape(node_def_.outputs(kDim0));
|
||||
return kAicpuKernelStateSucess;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint32_t SequenceAddNKernel::SequenceAddNTask() {
|
||||
const auto inputs_addr = reinterpret_cast<T *>(io_addrs_[kDim0]);
|
||||
auto output_addr = reinterpret_cast<T *>(io_addrs_[kDim1]);
|
||||
auto element_num = LongToSize(input_shapes_[0][0]);
|
||||
auto element_size = output_data_size_ / sizeof(T);
|
||||
auto cp_ret = memset_s(output_addr, output_data_size_, 0x0, output_data_size_);
|
||||
if (cp_ret != EOK) {
|
||||
AICPU_LOGE("For 'SequenceAddN', memset for output error, errorno: %d, size: %d.", cp_ret, output_data_size_);
|
||||
return kAicpuKernelStateInvalid;
|
||||
}
|
||||
auto input_x_addr = inputs_addr;
|
||||
auto sequence_add_n = [this, &output_addr, &input_x_addr](size_t start, size_t end) {
|
||||
for (size_t id = start; id < end; id++) {
|
||||
AtomicAdd<T>(output_addr + id, input_x_addr[id]);
|
||||
}
|
||||
};
|
||||
const int64_t per_unit_size = element_size / std::thread::hardware_concurrency();
|
||||
for (size_t i = 0; i < element_num; i++) {
|
||||
input_x_addr = inputs_addr + i * element_size;
|
||||
ParallelFor(element_size, per_unit_size, sequence_add_n);
|
||||
}
|
||||
|
||||
return kAicpuKernelStateSucess;
|
||||
}
|
||||
|
||||
uint32_t SequenceAddNKernel::DoCompute() {
|
||||
switch (input_data_type_) {
|
||||
case aicpuops::DataType::MS_INT32:
|
||||
return SequenceAddNTask<int>();
|
||||
case aicpuops::DataType::MS_INT64:
|
||||
return SequenceAddNTask<int64_t>();
|
||||
case aicpuops::DataType::MS_FLOAT32:
|
||||
return SequenceAddNTask<float>();
|
||||
case aicpuops::DataType::MS_FLOAT64:
|
||||
return SequenceAddNTask<double>();
|
||||
case aicpuops::DataType::MS_UINT32:
|
||||
return SequenceAddNTask<uint32_t>();
|
||||
case aicpuops::DataType::MS_UINT64:
|
||||
return SequenceAddNTask<uint64_t>();
|
||||
case aicpuops::DataType::MS_FLOAT16:
|
||||
return SequenceAddNTask<Eigen::half>();
|
||||
case aicpuops::DataType::MS_COMPLEX64:
|
||||
return SequenceAddNTask<std::complex<std::float_t>>();
|
||||
case aicpuops::DataType::MS_COMPLEX128:
|
||||
return SequenceAddNTask<std::complex<std::double_t>>();
|
||||
default:
|
||||
AICPU_LOGE("SequenceAddN kernel data type [%s] not support.", input_data_type_);
|
||||
return kAicpuKernelStateInvalid;
|
||||
}
|
||||
}
|
||||
} // namespace aicpu
|
||||
|
||||
extern "C" {
|
||||
__attribute__((visibility("default"))) uint32_t SequenceAddN(void *param) {
|
||||
aicpu::SequenceAddNKernel sequence_addn_kernel;
|
||||
return sequence_addn_kernel.Compute(param);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_
|
||||
#define AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include "common/kernel_base.h"
|
||||
|
||||
namespace aicpu {
|
||||
class SequenceAddNKernel : public KernelBase {
|
||||
public:
|
||||
SequenceAddNKernel() : KernelBase("SequenceAddN") {}
|
||||
~SequenceAddNKernel() = default;
|
||||
|
||||
protected:
|
||||
uint32_t ParseKernelParam() override;
|
||||
uint32_t DoCompute() override;
|
||||
template <typename T>
|
||||
uint32_t SequenceAddNTask();
|
||||
|
||||
aicpuops::DataType input_data_type_{aicpuops::DataType::MS_UNKNOWN};
|
||||
size_t input_data_size_{0};
|
||||
size_t output_data_size_{0};
|
||||
std::vector<std::vector<int64_t>> input_shapes_;
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_OPS_SEQUENCE_ADDN_KERNEL_H_
|
|
@ -107,6 +107,7 @@ constexpr auto kKLDivLoss = "KLDivLoss";
|
|||
constexpr auto kKLDivLossGrad = "KLDivLossGrad";
|
||||
constexpr auto kSampleDistortedBoundingBoxV2 = "SampleDistortedBoundingBoxV2";
|
||||
constexpr auto kSequenceAdd = "SequenceAdd";
|
||||
constexpr auto kSequenceAddN = "SequenceAddN";
|
||||
constexpr auto kSequenceAddOffset = "SequenceAddOffset";
|
||||
constexpr auto kSparseToDenseV2 = "SparseToDenseV2";
|
||||
constexpr auto kSparseSoftmaxCrossEntropyWithLogitsV2 = "SparseSoftmaxCrossEntropyWithLogitsV2";
|
||||
|
@ -297,6 +298,7 @@ const std::set<std::string> kCpuKernelBaseOps{kDropoutGenMaskOpName,
|
|||
kGatherDGradV2,
|
||||
kConcatOffset,
|
||||
kSequenceAdd,
|
||||
kSequenceAddN,
|
||||
kSequenceAddOffset,
|
||||
kSliceGrad,
|
||||
kRandomShuffle,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
* Copyright 2022-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -42,6 +42,7 @@ bool AICpuLibSelectPass::Process(const AnfNodePtr &node) const {
|
|||
kGatherDGradV2OpName,
|
||||
kConcatOffsetOpName,
|
||||
kSequenceAddOpName,
|
||||
kSequenceAddNOpName,
|
||||
kSequenceAddOffsetOpName,
|
||||
kSliceGradOpName,
|
||||
kRandomShuffleOpName,
|
||||
|
|
|
@ -423,3 +423,4 @@ from .sparse_to_dense_v2 import _sparse_to_dense_v2_aicpu
|
|||
from .bernoulli import _bernoulli_aicpu
|
||||
from .glu_grad import _glu_grad_aicpu
|
||||
from .sspaddmm import _sspaddmm_aicpu
|
||||
from .sequence_addn import _sequence_addn_aicpu
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
# Copyright 2023 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""SequenceAddN op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
|
||||
|
||||
sequence_addn_op_info = AiCPURegOp("SequenceAddN") \
|
||||
.fusion_type("OPAQUE") \
|
||||
.input(0, "input_0", "required") \
|
||||
.output(0, "output_data", "required") \
|
||||
.dtype_format(DataType.U32_Default_Tuple, DataType.U32_Default) \
|
||||
.dtype_format(DataType.U64_Default_Tuple, DataType.U64_Default) \
|
||||
.dtype_format(DataType.I64_Default_Tuple, DataType.I64_Default) \
|
||||
.dtype_format(DataType.I32_Default_Tuple, DataType.I32_Default) \
|
||||
.dtype_format(DataType.F64_Default_Tuple, DataType.F64_Default) \
|
||||
.dtype_format(DataType.F32_Default_Tuple, DataType.F32_Default) \
|
||||
.dtype_format(DataType.F16_Default_Tuple, DataType.F16_Default) \
|
||||
.dtype_format(DataType.C64_Default_Tuple, DataType.C64_Default) \
|
||||
.dtype_format(DataType.C128_Default_Tuple, DataType.C128_Default) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(sequence_addn_op_info)
|
||||
def _sequence_addn_aicpu():
|
||||
"""SequenceAddN AiCPU register"""
|
||||
return
|
|
@ -1331,3 +1331,5 @@ class DataType:
|
|||
|
||||
C64_Default = ("complex64", "DefaultFormat")
|
||||
C128_Default = ("complex128", "DefaultFormat")
|
||||
C64_Default_Tuple = ("complex64", "DefaultFormat", "tuple")
|
||||
C128_Default_Tuple = ("complex128", "DefaultFormat", "tuple")
|
||||
|
|
Loading…
Reference in New Issue