From 85f2032bf8177a103f66f18992cbf986fa721599 Mon Sep 17 00:00:00 2001 From: lilinjie Date: Thu, 22 Dec 2022 17:13:16 +0800 Subject: [PATCH] aicpu migration gp high priority --- .jenkins/check/config/filter_cppcheck.txt | 13 + .jenkins/check/config/filter_cpplint.txt | 25 ++ .jenkins/check/config/whitelizard.txt | 3 + mindspore/ccsrc/include/common/utils/utils.h | 5 + .../aicpu/aicpu_ops/cpu_kernel/CMakeLists.txt | 4 +- .../ms_kernel/adaptive_avg_pool_2d.cc | 199 +++++++++++++ .../ms_kernel/adaptive_avg_pool_2d.h | 46 +++ .../ms_kernel/adaptive_avg_pool_2d_grad.cc | 213 +++++++++++++ .../ms_kernel/adaptive_avg_pool_2d_grad.h | 32 ++ .../ms_kernel/log_matrix_determinant.cc | 170 +++++++++++ .../ms_kernel/log_matrix_determinant.h | 35 +++ .../aicpu_ops/cpu_kernel/ms_kernel/median.cc | 212 +++++++++++++ .../aicpu_ops/cpu_kernel/ms_kernel/median.h | 38 +++ .../cpu_kernel/ms_kernel/mediangrad.cc | 280 ++++++++++++++++++ .../cpu_kernel/ms_kernel/mediangrad.h | 42 +++ .../cpu_kernel/utils/eigen_tensor.cc | 21 ++ .../aicpu_ops/cpu_kernel/utils/eigen_tensor.h | 170 +++++++++++ .../optimizer/mindir/aicpu_lib_select.cc | 9 +- 18 files changed, 1514 insertions(+), 3 deletions(-) create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.h create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.h create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.h create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.cc create mode 100644 mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h diff --git a/.jenkins/check/config/filter_cppcheck.txt b/.jenkins/check/config/filter_cppcheck.txt index fd233351aba..a3c3bae6bbd 100644 --- a/.jenkins/check/config/filter_cppcheck.txt +++ b/.jenkins/check/config/filter_cppcheck.txt @@ -74,3 +74,16 @@ "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc" "syntaxError" "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc" "syntaxError" +# AICPU migration +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "useStlAlgorithm" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "variableScope" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constParameter" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constVariable" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "unreadVariable" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "redundantAssignment" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constArgument" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "useStlAlgorithm" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "variableScope" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "constParameter" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "constVariable" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "unreadVariable" \ No newline at end of file diff --git a/.jenkins/check/config/filter_cpplint.txt b/.jenkins/check/config/filter_cpplint.txt index b9299119e6e..7ca3fcc0409 100644 --- a/.jenkins/check/config/filter_cpplint.txt +++ b/.jenkins/check/config/filter_cpplint.txt @@ -106,3 +106,28 @@ "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "build/include" "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "build/include" +# AICPU migration +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include_subdir" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "runtime/references" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include_what_you_use" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/indent" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/ending_newline" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "runtime/explicit" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/braces" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/namespace" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/braces" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/end_of_line" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/casting" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/namespaces" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include_subdir" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "runtime/references" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include_what_you_use" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/indent" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/ending_newline" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "runtime/explicit" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "readability/braces" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "readability/namespace" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/braces" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include" +"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/end_of_line" \ No newline at end of file diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index 2be6a4ba510..3bcd0bb48c7 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -266,3 +266,6 @@ mindspore/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc:mindspore mindspore/mindspore/python/mindspore/ops/function/nn_func.py:conv3d mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/matmul_avx512_mask_fp32.c:GemmRowxColMaskKernelFp32 mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc:mindspore::kernel::CropAndResizeCpuKernelMod::LaunchKernel + +# AICPU migration +mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc:aicpu::MedianGradCpuKernel::MedianGradCompute \ No newline at end of file diff --git a/mindspore/ccsrc/include/common/utils/utils.h b/mindspore/ccsrc/include/common/utils/utils.h index 7f4fa7bca0d..e2690c27553 100644 --- a/mindspore/ccsrc/include/common/utils/utils.h +++ b/mindspore/ccsrc/include/common/utils/utils.h @@ -45,6 +45,8 @@ constexpr auto kAdamOpName = "Adam"; constexpr auto kAdamWeightDecayName = "AdamWeightDecay"; constexpr auto kAdaptiveMaxPool2dOpName = "AdaptiveMaxPool2d"; constexpr auto kAdaptiveMaxPool2DOpName = "AdaptiveMaxPool2D"; +constexpr auto kAdaptiveAvgPool2dOpName = "AdaptiveAvgPool2d"; +constexpr auto kAdaptiveAvgPool2dGradOpName = "AdaptiveAvgPool2dGrad"; constexpr auto kAdaptiveMaxPool3DGradOpName = "AdaptiveMaxPool3DGrad"; constexpr auto kAddNOpName = "AddN"; constexpr auto kAddOpName = "Add"; @@ -373,6 +375,7 @@ constexpr auto kLessOpName = "Less"; constexpr auto kLinSpaceOpName = "LinSpace"; constexpr auto kLinSpaceDOpName = "LinSpaceD"; constexpr auto kListDiffOpName = "ListDiff"; +constexpr auto kLogMatrixDeterminantOpName = "LogMatrixDeterminant"; constexpr auto kLogOpName = "Log"; constexpr auto kLogSoftmaxOpName = "LogSoftmax"; constexpr auto kLogSoftmaxV2OpName = "LogSoftmaxV2"; @@ -404,6 +407,8 @@ constexpr auto kMaxPoolV2OpName = "MaxPoolV2"; constexpr auto kMaxPoolExt2OpName = "MaxPoolExt2"; constexpr auto kMaxPoolWithArgmaxOpName = "MaxPoolWithArgmax"; constexpr auto kMeanGradOpName = "MeanGrad"; +constexpr auto kMedianOpName = "Median"; +constexpr auto kMedianGradOpName = "MedianGrad"; constexpr auto kMemCpyAsyncOpName = "memcpy_async"; constexpr auto kMinimumGradOpName = "MinimumGrad"; constexpr auto kMinimumOpName = "Minimum"; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/CMakeLists.txt b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/CMakeLists.txt index a496738a3e1..920256d955c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/CMakeLists.txt @@ -53,9 +53,9 @@ target_link_libraries(mindspore_cpu_kernels PRIVATE -pthread ) -set(INSTALL_LIBRARY_DIR lib) +set(INSTALL_LIBRARY_DIR lib/plugin) install(TARGETS mindspore_cpu_kernels OPTIONAL EXPORT mindspore_cpu_kernels-targets - LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} + LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/ascend ) diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.cc new file mode 100644 index 00000000000..d047c939094 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.cc @@ -0,0 +1,199 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h" + +#include "cpu_kernel/common/cpu_kernel_utils.h" +#include "utils/eigen_tensor.h" +#include "utils/kernel_util.h" + +using namespace std; + +namespace { +const char *kAdaptiveAvgPool2d = "AdaptiveAvgPool2d"; +constexpr uint32_t kInputNum = 1; +constexpr uint32_t kOutputNum = 1; +constexpr int64_t kParallelDataNums = 4 * 1024; +constexpr int64_t kthree = 3; +constexpr int64_t kneg_three = -3; +constexpr int64_t kfour = 4; +constexpr int64_t ktwo = 2; +constexpr int64_t kneg_two = -2; + +template +struct AdaptiveCalcArgs { + SCALAR_T *input_data = nullptr; + SCALAR_T *output_data = nullptr; + + int64_t size_b = 1; + int64_t size_d = 0; + int64_t in_size_h = 0; + int64_t in_size_w = 0; + + int64_t out_size_h = 0; + int64_t out_size_w = 0; + + int64_t in_stride_d = 0; + int64_t in_stride_h = 0; + int64_t in_stride_w = 0; +}; + +#define SWITCH_PARALLEL(SHARD, end_num, num) \ + if ((num) <= kParallelDataNums) { \ + for (size_t i = 0; i < size_t(end_num); i++) { \ + SHARD(i, i + 1); \ + } \ + } else { \ + KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, end_num, 1, SHARD), \ + "AdaptiveAvgPool2d #SHARD Compute failed."); \ + } +} // namespace + +namespace aicpu { +template +SCALAR_T ComputeSum(int64_t span_h, int64_t span_w, SCALAR_T *in_point, AdaptiveCalcArgs &args) { + SCALAR_T sum = static_cast(0.); + for (int in_h = 0; in_h < span_h; in_h++) { + for (int in_w = 0; in_w < span_w; in_w++) { + SCALAR_T val = *(in_point + in_h * args.in_stride_h + in_w * args.in_stride_w); + + sum += static_cast(val); + } + } + return sum; +} + +template +void ComputeSingleThread(int64_t start, int64_t end, AdaptiveCalcArgs args) { + for (auto d = start; d < end; d++) { + /* loop over output */ + for (int64_t out_h = 0; out_h < args.out_size_h; out_h++) { + int in_start_h = StartIndex(out_h, args.out_size_h, args.in_size_h); + int in_end_h = EndIndex(out_h, args.out_size_h, args.in_size_h); + int span_h = in_end_h - in_start_h; + + for (int64_t out_w = 0; out_w < args.out_size_w; out_w++) { + int in_start_w = StartIndex(out_w, args.out_size_w, args.in_size_w); + int in_end_w = EndIndex(out_w, args.out_size_w, args.in_size_w); + int span_w = in_end_w - in_start_w; + + // local pointers + SCALAR_T *in_point = + args.input_data + d * args.in_stride_d + in_start_h * args.in_stride_h + in_start_w * args.in_stride_w; + SCALAR_T *out_point = + args.output_data + d * args.out_size_h * args.out_size_w + out_h * args.out_size_w + out_w; + + /* compute local average */ + /* set output to local average */ + *out_point = SCALAR_T(ComputeSum(span_h, span_w, in_point, args) / static_cast(span_h * span_w)); + } + } + } +} + +template +uint32_t AdaptiveAvgPool2dOutFrame(const CpuKernelContext &ctx, AdaptiveCalcArgs args, int64_t num) { + auto shard_frame = [&](int64_t start, int64_t end) { ComputeSingleThread(start, end, args); }; + SWITCH_PARALLEL(shard_frame, args.size_d, num); + return KERNEL_STATUS_OK; +} + +template +uint32_t AdaptiveAvgPool2dOutTemplate(const CpuKernelContext &ctx) { + Tensor &input = *(ctx.Input(kFirstInputIndex)); + auto input_shape_ptr = input.GetTensorShape(); + int32_t input_dims = input_shape_ptr->GetDims(); + KERNEL_CHECK_NULLPTR(input_shape_ptr, KERNEL_STATUS_PARAM_INVALID, "Get input 0 shape failed."); + + KERNEL_CHECK_FALSE((input_dims == kthree || input_dims == kfour), KERNEL_STATUS_PARAM_INVALID, + "Non-empty [3D] or [4D] (batch mode) tensor expected for input 0."); + + for (int32_t i = 0; i < input_dims; i++) { + KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(i) > 0), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d: expected input to have non-empty spatial " + "dimensions, " + "but input 0 has sizes [%d] with dimension [%d] being empty.", + input_dims, i); + } + + AdaptiveCalcArgs args; + // sizes + std::vector input_dim_sizes = input_shape_ptr->GetDimSizes(); + args.size_d = input_dim_sizes.end()[kneg_three]; + args.in_size_h = input_dim_sizes.end()[kneg_two]; + args.in_size_w = input_dim_sizes.end()[-1]; + + // strides + args.in_stride_w = 1; + args.in_stride_h = args.in_size_w; + args.in_stride_d = args.in_stride_h * args.in_size_h; + + // output sizes + AttrValue *attr = ctx.GetAttr("output_size"); + std::vector output_size_data = attr->GetListInt(); + if (output_size_data.size() == ktwo) { + args.out_size_h = output_size_data[0] > 0 ? output_size_data[0] : input_dim_sizes.end()[-2]; + args.out_size_w = output_size_data[1] > 0 ? output_size_data[1] : input_dim_sizes.end()[-1]; + } else if (output_size_data.size() == 1) { + KERNEL_CHECK_FALSE((output_size_data[0] >= 0), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d: output_size value should be non-negative"); + args.out_size_h = output_size_data[0]; + args.out_size_w = output_size_data[0]; + } else { + KERNEL_LOG_ERROR("output_size length should be 1 OR 2, but got [%d]", output_size_data.size()); + return KERNEL_STATUS_PARAM_INVALID; + } + // indices will contain i,j locations for each output point + args.input_data = static_cast(input.GetData()); + args.output_data = static_cast(ctx.Output(kFirstOutputIndex)->GetData()); + int64_t num = input.NumElements(); + // resize output + if (input_dims == kthree) { + AdaptiveAvgPool2dOutFrame(ctx, args, num); + } else { + auto shard_template = [&](int64_t start, int64_t end) { + for (auto b = start; b < end; b++) { + AdaptiveCalcArgs sub_args = args; + sub_args.input_data = args.input_data + b * args.in_stride_d * args.size_d; + sub_args.output_data = args.output_data + b * args.size_d * args.out_size_h * args.out_size_w; + AdaptiveAvgPool2dOutFrame(ctx, sub_args, num); + } + }; + SWITCH_PARALLEL(shard_template, input_dim_sizes[0], num); + } + return KERNEL_STATUS_OK; +} + +uint32_t AdaptiveAvgPool2d::Compute(const CpuKernelContext &ctx) { + // check params + KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "[%s] check input and output number failed.", + kAdaptiveAvgPool2d); + + Tensor *input_0 = ctx.Input(kFirstInputIndex); + auto data_type = static_cast(input_0->GetDataType()); + // Compute by data_type + switch (data_type) { + case DT_FLOAT: + return AdaptiveAvgPool2dOutTemplate(ctx); + case DT_FLOAT16: + return AdaptiveAvgPool2dOutTemplate(ctx); + default: + KERNEL_LOG_ERROR("AdaptiveAvgPool2d kernel data type [%s] not support.", DTypeStr(data_type).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + return KERNEL_STATUS_OK; +} +REGISTER_CPU_KERNEL(kAdaptiveAvgPool2d, AdaptiveAvgPool2d); +} // namespace aicpu diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h new file mode 100644 index 00000000000..8af8e8e6405 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h @@ -0,0 +1,46 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_H_ +#define AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_H_ + +#include + +#include "cpu_kernel/inc/cpu_ops_kernel.h" +#include "cpu_kernel/inc/cpu_types.h" + +namespace aicpu { +class AdaptiveAvgPool2d : public CpuKernel { + public: + AdaptiveAvgPool2d() = default; + ~AdaptiveAvgPool2d() = default; + uint32_t Compute(const CpuKernelContext &ctx) override; +}; + +inline int StartIndex(int offset, int out_size, int in_size) { + if (out_size != 0) { + return static_cast(std::floor(static_cast((offset * in_size)) / out_size)); + } + return 0; +} + +inline int EndIndex(int offset, int out_size, int in_size) { + if (out_size != 0) { + return static_cast(std::ceil(static_cast(((offset + 1) * in_size)) / out_size)); + } + return 0; +} +} // namespace aicpu +#endif // AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL3D_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.cc new file mode 100644 index 00000000000..2ad1057eed4 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.cc @@ -0,0 +1,213 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h" +#include +#include "utils/eigen_tensor.h" +#include "utils/kernel_util.h" +#include "cpu_kernel/common/cpu_kernel_utils.h" + +namespace { +const char *kAdaptiveAvgPool2dGrad = "AdaptiveAvgPool2dGrad"; +template +struct AdaptiveCalcArgs { + SCALAR_T *input_data = nullptr; + SCALAR_T *output_data = nullptr; + + int64_t in_size_b = 0; + int64_t in_size_d = 0; + int64_t in_size_h = 0; + int64_t in_size_w = 0; + int64_t out_size_h = 0; + int64_t out_size_w = 0; + + int64_t out_stride_d = 0; + int64_t in_stride_d = 0; + int64_t out_stride_h = 0; + int64_t in_stride_h = 0; +}; + +// out_size is not be zero +inline int StartIndex(int offset, int out_size, int in_size) { + return (int)std::floor((float)(offset * in_size) / out_size); +} + +// out_size is not be zero +inline int EndIndex(int offset, int out_size, int in_size) { + return (int)std::ceil((float)((offset + 1) * in_size) / out_size); +} +} // namespace + +namespace aicpu { +template +uint32_t AdaptiveAvgPool2dGradOutFrame(const CpuKernelContext &ctx, AdaptiveCalcArgs args) { + uint32_t min_core_num = 1; + int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2); + + int64_t total_size = args.in_size_d * args.in_size_b * args.out_size_h * args.out_size_w; + int64_t max_core_num_total = max_core_num; + if (max_core_num_total > total_size) { + max_core_num_total = total_size; + } + auto shard_init = [&](int64_t start, int64_t end) { + for (auto c = start; c < end; c++) { + args.output_data[c] = (SCALAR_T)0; + } + }; + KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, total_size, total_size / max_core_num_total, shard_init), + "AdaptiveAvgPool2dGrad Compute failed."); + + int64_t in_size_db = args.in_size_d * args.in_size_b; + if (max_core_num > in_size_db) { + max_core_num = in_size_db; + } + // treat batch size and channels as one dimension + auto shard_work = [&](int64_t start, int64_t end) { + for (auto c = start; c < end; c++) { + SCALAR_T *output_offset_ptr = args.output_data + c * args.out_stride_d; + SCALAR_T *input_offset_ptr = args.input_data + c * args.in_stride_d; + + for (int64_t ih = 0; ih < args.in_size_h; ih++) { + int64_t out_start_h = StartIndex(ih, args.in_size_h, args.out_size_h); + int64_t out_end_h = EndIndex(ih, args.in_size_h, args.out_size_h); + int64_t step_h = out_end_h - out_start_h; + for (int64_t iw = 0; iw < args.in_size_w; iw++) { + int64_t out_start_w = StartIndex(iw, args.in_size_w, args.out_size_w); + int64_t out_end_w = EndIndex(iw, args.in_size_w, args.out_size_w); + int64_t step_w = out_end_w - out_start_w; + if (step_w == 0 || step_h == 0) { + continue; + } + SCALAR_T grad_delta = input_offset_ptr[ih * args.in_stride_h + iw] / step_h / step_w; + int64_t oh = 0, ow = 0, output_size = args.out_stride_d; + for (oh = out_start_h; oh < out_end_h; oh++) { + for (ow = out_start_w; ow < out_end_w; ow++) { + int64_t output_idx = oh * args.out_stride_h + ow; + KERNEL_CHECK_FALSE_VOID((output_idx < output_size), + "Feature map output_idx [%lld] overflow output_size [%lld].", output_idx, + output_size); + output_offset_ptr[output_idx] += grad_delta; + } + } + } + } + } + }; + KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, in_size_db, in_size_db / max_core_num, shard_work), + "AdaptiveAvgPool2dGrad Compute failed."); + return KERNEL_STATUS_OK; +} + +template +uint32_t AdaptiveAvgPool2dGradOutCpuTemplate(const CpuKernelContext &ctx) { + Tensor &input = *(ctx.Input(kFirstInputIndex)); + + auto input_shape_ptr = input.GetTensorShape(); + KERNEL_CHECK_NULLPTR(input_shape_ptr, KERNEL_STATUS_PARAM_INVALID, "Get input x shape failed."); + int32_t input_dims = input_shape_ptr->GetDims(); + + for (int32_t i = 0; i < input_dims; i++) { + KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(i) > 0), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d_grad: expected input to have non-empty spatial dimensions, " + "but input has sizes [%d] with dimension [%d] being empty.", + input_dims, i); + } + + KERNEL_CHECK_FALSE(input_dims == 4, KERNEL_STATUS_PARAM_INVALID, "Non-empty [4D] tensor expected for input."); + + AdaptiveCalcArgs args; + args.in_size_b = 1; + args.in_size_d = 0; + args.in_size_h = 0; + args.in_size_w = 0; + args.out_size_h = 0; + args.out_size_w = 0; + args.out_stride_d = 1; + args.in_stride_d = 1; + args.out_stride_h = 1; + args.in_stride_h = 1; + + std::vector orig_input_size = ctx.GetAttr("orig_input_shape")->GetListInt(); + KERNEL_CHECK_FALSE((orig_input_size.size() == 4), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d_grad: internal error, orig_input_size.size() must be [4]"); + KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(0) == orig_input_size[0]), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d_grad: internal error, orig_input_size Batch must equal " + "input_size Batch, now orig_input_size Batch is [%lld], input_size Batch is [%lld].", + input_shape_ptr->GetDimSize(0), orig_input_size[0]); + KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(1) == orig_input_size[1]), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d_grad: internal error, orig_input_size Channel must equal " + "input_size channel, now orig_input_size Channel is [%lld], input_size Channel is [%lld].", + input_shape_ptr->GetDimSize(1), orig_input_size[1]); + + int dim_w = 3; + int dim_h = 2; + // sizes + args.in_size_d = input_shape_ptr->GetDimSize(dim_h - 1); + args.in_size_h = input_shape_ptr->GetDimSize(dim_h); + args.in_size_w = input_shape_ptr->GetDimSize(dim_w); + + args.out_size_h = orig_input_size[dim_h]; + args.out_size_w = orig_input_size[dim_w]; + KERNEL_CHECK_FALSE((args.out_size_h != 0 && args.out_size_w != 0), KERNEL_STATUS_PARAM_INVALID, + "Adaptive_avg_pool2d_grad: internal error, output_size H or W can not be zero, " + "now H is [%lld], W is [%lld].", + args.out_size_h, args.out_size_w); + // strides + // The calculation does not overflow because max value is number of user input data, + // which less then int64_t range. + args.out_stride_d = args.out_size_h * args.out_size_w; + args.out_stride_h = args.out_size_w; + args.in_stride_d = args.in_size_h * args.in_size_w; + args.in_stride_h = args.in_size_w; + + args.input_data = static_cast(input.GetData()); + args.output_data = static_cast(ctx.Output(kFirstOutputIndex)->GetData()); + + return AdaptiveAvgPool2dGradOutFrame(ctx, args); +} + +uint32_t AdaptiveAvgPool2dGrad::Compute(const CpuKernelContext &ctx) { + Tensor *input_0 = ctx.Input(kFirstInputIndex); + KERNEL_CHECK_NULLPTR(input_0, KERNEL_STATUS_PARAM_INVALID, "Get input tensor failed."); + KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input data failed."); + Tensor *output_0 = ctx.Output(kFirstOutputIndex); + KERNEL_CHECK_NULLPTR(output_0, KERNEL_STATUS_PARAM_INVALID, "Get output tensor failed."); + KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data failed."); + + AttrValue *attr_orig_input_shape = ctx.GetAttr("orig_input_shape"); + KERNEL_CHECK_NULLPTR(attr_orig_input_shape, KERNEL_STATUS_PARAM_INVALID, "[%s] get attr:orig_input_shape failed.", + kAdaptiveAvgPool2dGrad); + std::vector v_orig_input_shape = attr_orig_input_shape->GetListInt(); + + KERNEL_LOG_INFO("AdaptiveAvgPool2dGrad kernel, input[0]: size is [%llu]; output_0: size is [%llu].", + input_0->GetDataSize(), output_0->GetDataSize()); + KERNEL_LOG_INFO("[%s] get attr:orig_input_shape [%s].", kAdaptiveAvgPool2dGrad, + VectorToString(v_orig_input_shape).c_str()); + + auto data_type = static_cast(input_0->GetDataType()); + // Compute by data_type + switch (data_type) { + case DT_FLOAT: + return AdaptiveAvgPool2dGradOutCpuTemplate(ctx); + case DT_FLOAT16: + return AdaptiveAvgPool2dGradOutCpuTemplate(ctx); + default: + KERNEL_LOG_ERROR("AdaptiveAvgPool2dGrad kernel data type [%s] not support.", DTypeStr(data_type).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } +} + +REGISTER_CPU_KERNEL(kAdaptiveAvgPool2dGrad, AdaptiveAvgPool2dGrad); +} // namespace aicpu diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h new file mode 100644 index 00000000000..4d1f6c466bf --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_GRAD_H_ +#define AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_GRAD_H_ + +#include "cpu_kernel/inc/cpu_ops_kernel.h" +#include "cpu_kernel/inc/cpu_types.h" + +namespace aicpu { +class AdaptiveAvgPool2dGrad : public CpuKernel { + public: + AdaptiveAvgPool2dGrad() = default; + ~AdaptiveAvgPool2dGrad() = default; + + protected: + uint32_t Compute(const CpuKernelContext &ctx) override; +}; +} // namespace aicpu +#endif // AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL_2DGRAD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.cc new file mode 100644 index 00000000000..c8187a5d3db --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.cc @@ -0,0 +1,170 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "log_matrix_determinant.h" + +#include "Eigen/LU" +#include "cpu_kernel_utils.h" +#include "utils/kernel_util.h" + +namespace { +const uint32_t kOutputNum = 2; +const uint32_t kInputNum = 1; +const uint32_t kIndexTwo = 2; +const char *const kLogMatrixDeterminant = "LogMatrixDeterminant"; +constexpr int64_t kParallelDataNums = 8 * 1024; + +#define LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DTYPE, TYPE, CTX) \ + case (DTYPE): { \ + uint32_t result = LogMatrixDeterminantCompute(CTX); \ + if (result != KERNEL_STATUS_OK) { \ + KERNEL_LOG_ERROR("LogMatrixDeterminant kernel compute failed."); \ + return result; \ + } \ + break; \ + } +} // namespace + +namespace aicpu { +uint32_t LogMatrixDeterminantCpuKernel::Compute(const CpuKernelContext &ctx) { + KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "[%s] check input and output failed.", + kLogMatrixDeterminant); + KERNEL_HANDLE_ERROR(LogMatrixDeterminantCheck(ctx), "[%s] check params failed.", kLogMatrixDeterminant); + DataType data_type = ctx.Input(0)->GetDataType(); + switch (data_type) { + LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_FLOAT, float, ctx) + LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_DOUBLE, double, ctx) + LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_COMPLEX64, std::complex, ctx) + LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_COMPLEX128, std::complex, ctx) + default: + KERNEL_LOG_ERROR("LogMatrixDeterminant kernel data type [%s] not support.", DTypeStr(data_type).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + return KERNEL_STATUS_OK; +} + +uint32_t LogMatrixDeterminantCpuKernel::LogMatrixDeterminantCheck(const CpuKernelContext &ctx) { + auto input_0 = ctx.Input(0); + auto output_0 = ctx.Output(0); + auto output_1 = ctx.Output(1); + KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input x data failed.") + KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output sign data failed.") + KERNEL_CHECK_NULLPTR(output_1->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output y data failed.") + + KERNEL_CHECK_NULLPTR(input_0->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get input x tensor shape failed.") + KERNEL_CHECK_NULLPTR(output_0->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output sign tensor shape failed.") + KERNEL_CHECK_NULLPTR(output_1->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output y tensor shape failed.") + std::vector shape_x = input_0->GetTensorShape()->GetDimSizes(); + std::vector shape_sign = output_0->GetTensorShape()->GetDimSizes(); + std::vector shape_y = output_1->GetTensorShape()->GetDimSizes(); + size_t shape_size_x = shape_x.size(); + size_t shape_size_sign = shape_sign.size(); + size_t shape_size_y = shape_y.size(); + KERNEL_CHECK_FALSE((shape_size_x > 1), KERNEL_STATUS_PARAM_INVALID, "Input x must be at least rank 2, got [%zu].", + shape_size_x) + KERNEL_CHECK_FALSE((shape_x[shape_size_x - 1] > 0), KERNEL_STATUS_PARAM_INVALID, + "Input x last dimension must be at least 1.") + KERNEL_CHECK_FALSE((shape_x[shape_size_x - kIndexTwo] == shape_x[shape_size_x - 1]), KERNEL_STATUS_PARAM_INVALID, + "Input x dimensions must be equal, but are [%lld] and [%lld].", shape_x[shape_size_x - kIndexTwo], + shape_x[shape_size_x - 1]) + + KERNEL_CHECK_FALSE((shape_size_sign == shape_size_x - kIndexTwo), KERNEL_STATUS_PARAM_INVALID, + "Output sign must be rank [%zu], got [%zu].", shape_size_x - kIndexTwo, shape_size_sign) + KERNEL_CHECK_FALSE((shape_size_y == shape_size_x - kIndexTwo), KERNEL_STATUS_PARAM_INVALID, + "Output y must be rank [%zu], got [%zu].", shape_size_x - kIndexTwo, shape_size_y) + for (size_t i = 0; i < shape_size_x - kIndexTwo; i++) { + KERNEL_CHECK_FALSE((shape_sign[i] == shape_x[i]), KERNEL_STATUS_PARAM_INVALID, + "Output sign and Input x dimension [%zu] must be equal, got [%lld] and [%lld].", i, + shape_sign[i], shape_x[i]) + KERNEL_CHECK_FALSE((shape_y[i] == shape_x[i]), KERNEL_STATUS_PARAM_INVALID, + "Output y and Input x dimension [%zu] must be equal, got [%lld] and [%lld].", i, shape_y[i], + shape_x[i]) + } + return KERNEL_STATUS_OK; +} + +template +uint32_t LogMatrixDeterminantCpuKernel::LogMatrixDeterminantCompute(const CpuKernelContext &ctx) { + auto input_x = reinterpret_cast(ctx.Input(0)->GetData()); + auto output_sign = reinterpret_cast(ctx.Output(0)->GetData()); + auto output_y = reinterpret_cast(ctx.Output(1)->GetData()); + + std::vector shape_x = ctx.Input(0)->GetTensorShape()->GetDimSizes(); + size_t shape_size = shape_x.size(); + int64_t m = shape_x[shape_size - 1]; + int64_t size_mm = m * m; + typedef Eigen::Matrix MartixXd; + using RealT = typename Eigen::NumTraits::Real; + if (size_mm > 0) { + int64_t martix_num = ctx.Input(0)->NumElements() / size_mm; + int64_t data_size = ctx.Input(0)->NumElements() * static_cast(sizeof(T)); + if (data_size <= kParallelDataNums) { + for (int64_t i = 0; i < martix_num; i++) { + RealT log_abs_det = 0; + T sign = 1; + Eigen::Map martix_x(input_x + i * m * m, m, m); + if (martix_x.size() > 0) { + Eigen::PartialPivLU lu(martix_x); + MartixXd LU = lu.matrixLU(); + sign = lu.permutationP().determinant(); + auto diag = LU.diagonal().array().eval(); + auto abs_diag = diag.cwiseAbs().eval(); + log_abs_det += abs_diag.log().sum(); + sign *= (diag / abs_diag).prod(); + } + if (!Eigen::numext::isfinite(log_abs_det)) { + sign = 0; + log_abs_det = log_abs_det > 0 ? -std::log(RealT(0)) : std::log(RealT(0)); + } + *(output_sign + i) = sign; + *(output_y + i) = log_abs_det; + } + } else { + uint32_t min_core_num = 1; + int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - kResvCpuNum); + if (max_core_num > martix_num) { + max_core_num = martix_num; + } + auto shard_work = [&](size_t start, size_t end) { + RealT log_abs_det = 0; + for (size_t i = start; i < end; i++) { + log_abs_det = 0; + T sign = 1; + Eigen::Map martix_x(input_x + i * m * m, m, m); + if (martix_x.size() > 0) { + Eigen::PartialPivLU lu(martix_x); + MartixXd LU = lu.matrixLU(); + sign = static_cast(lu.permutationP().determinant()); + auto diag = LU.diagonal().array().eval(); + auto abs_diag = diag.cwiseAbs().eval(); + log_abs_det += abs_diag.log().sum(); + sign *= (diag / abs_diag).prod(); + } + if (!Eigen::numext::isfinite(log_abs_det)) { + sign = 0; + log_abs_det = log_abs_det > 0 ? -std::log(RealT(0)) : std::log(RealT(0)); + } + *(output_sign + i) = sign; + *(output_y + i) = log_abs_det; + } + }; + KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, martix_num, martix_num / max_core_num, shard_work), + "LogMatrixDeterminant Compute failed."); + } + } + return KERNEL_STATUS_OK; +} +REGISTER_CPU_KERNEL(kLogMatrixDeterminant, LogMatrixDeterminantCpuKernel); +} // namespace aicpu diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.h new file mode 100644 index 00000000000..64d79357221 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/log_matrix_determinant.h @@ -0,0 +1,35 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef AICPU_KERNELS_NORMALIZED_LOG_MATRIX_DETERMINANT_H +#define AICPU_KERNELS_NORMALIZED_LOG_MATRIX_DETERMINANT_H + +#include "cpu_ops_kernel.h" + +namespace aicpu { +class LogMatrixDeterminantCpuKernel : public CpuKernel { + public: + LogMatrixDeterminantCpuKernel() = default; + ~LogMatrixDeterminantCpuKernel() override = default; + uint32_t Compute(const CpuKernelContext &ctx) override; + + private: + uint32_t LogMatrixDeterminantCheck(const CpuKernelContext &ctx); + + template + uint32_t LogMatrixDeterminantCompute(const CpuKernelContext &ctx); +}; +} // namespace aicpu +#endif \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.cc new file mode 100644 index 00000000000..43abcef3260 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.cc @@ -0,0 +1,212 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "median.h" + +#include "cpu_kernel_utils.h" +#include "utils/eigen_tensor.h" +#include "utils/kernel_util.h" + +#include + +namespace { +const uint32_t kInputNum = 1; +const uint32_t kOutputNum = 2; +const char *kMedian = "Median"; + +#define MEDIAN_COMPUTE_CASE(DTYPE, TYPE, CTX) \ + case (DTYPE): { \ + uint32_t result = MedianCompute(CTX); \ + if (result != KERNEL_STATUS_OK) { \ + KERNEL_LOG_ERROR("Median kernel compute failed."); \ + return result; \ + } \ + break; \ + } + +#define GLOBAL_MEDIAN_COMPUTE_CASE(DTYPE, TYPE, CTX) \ + case (DTYPE): { \ + uint32_t result = GlobalMedianCompute(CTX); \ + if (result != KERNEL_STATUS_OK) { \ + KERNEL_LOG_ERROR("Median kernel compute failed."); \ + return result; \ + } \ + break; \ + } +} // namespace + +namespace aicpu { +uint32_t MedianCpuKernel::Compute(const CpuKernelContext &ctx) { + KERNEL_HANDLE_ERROR(MedianCheck(ctx), "Median check params failed."); + auto data_type = ctx.Input(0)->GetDataType(); + AttrValue *global_ptr = ctx.GetAttr("global_median"); + bool global_median_bool = global_ptr->GetBool(); + if (global_median_bool == false) { + switch (data_type) { + MEDIAN_COMPUTE_CASE(DT_INT16, int16_t, ctx) + MEDIAN_COMPUTE_CASE(DT_INT32, int32_t, ctx) + MEDIAN_COMPUTE_CASE(DT_INT64, int64_t, ctx) + MEDIAN_COMPUTE_CASE(DT_FLOAT, float, ctx) + MEDIAN_COMPUTE_CASE(DT_DOUBLE, double, ctx) + default: + KERNEL_LOG_ERROR("Median kernel data type [%s] not support.", DTypeStr(data_type).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + } else { + switch (data_type) { + GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT16, int16_t, ctx) + GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT32, int32_t, ctx) + GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT64, int64_t, ctx) + GLOBAL_MEDIAN_COMPUTE_CASE(DT_FLOAT, float, ctx) + GLOBAL_MEDIAN_COMPUTE_CASE(DT_DOUBLE, double, ctx) + default: + KERNEL_LOG_ERROR("Median kernel data type [%s] not support.", DTypeStr(data_type).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + } + return KERNEL_STATUS_OK; +} + +uint32_t MedianCpuKernel::MedianCheck(const CpuKernelContext &ctx) { + auto global_median = ctx.GetAttr("global_median"); + KERNEL_CHECK_NULLPTR(global_median, KERNEL_STATUS_PARAM_INVALID, "Get attr global_median failed."); + bool global_median_value = global_median->GetBool(); + if (global_median_value == false) { + KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "Median check input and output number failed."); + auto input_shape_ptr = ctx.Input(0)->GetTensorShape(); + int64_t input_shape_dims = input_shape_ptr->GetDims(); + int64_t dim_num = 0; + AttrValue *dim_ptr = ctx.GetAttr("axis"); + if (dim_ptr != nullptr) dim_num = dim_ptr->GetInt(); + if (input_shape_dims != 0) { + KERNEL_CHECK_FALSE((dim_num >= (0 - input_shape_dims) && dim_num <= (input_shape_dims - 1)), + KERNEL_STATUS_PARAM_INVALID, + "IndexError: Dimension out of range " + "(expected to be in range of [[%lld], [%lld]], but got [%lld])", + (0 - input_shape_dims), (input_shape_dims - 1), dim_num); + } else { + KERNEL_CHECK_FALSE((dim_num >= -1 && dim_num <= 0), KERNEL_STATUS_PARAM_INVALID, + "IndexError: Dimension out of range " + "(expected to be in range of [[%lld], [%lld]], but got [%lld])", + -1, 0, dim_num); + } + } else { + Tensor *input_0 = ctx.Input(0); + KERNEL_CHECK_NULLPTR(input_0, KERNEL_STATUS_PARAM_INVALID, "Get input failed."); + KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input data failed."); + Tensor *output_0 = ctx.Output(0); + KERNEL_CHECK_NULLPTR(output_0, KERNEL_STATUS_PARAM_INVALID, "Get output_0 failed."); + KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data 0 failed."); + } + if (global_median_value == false) { + KERNEL_LOG_DEBUG( + "MedianCpuKernel[%s], input0: size[%llu];" + "output0: size[%llu], output1: size[%llu].", + ctx.GetOpType().c_str(), ctx.Input(0)->GetDataSize(), ctx.Output(0)->GetDataSize(), ctx.Output(1)->GetDataSize()); + } else { + KERNEL_LOG_DEBUG( + "MedianCpuKernel[%s], input0: size[%llu];" + "output0: size[%llu].", + ctx.GetOpType().c_str(), ctx.Input(0)->GetDataSize(), ctx.Output(0)->GetDataSize()); + } + + return KERNEL_STATUS_OK; +} + +template +uint32_t MedianCpuKernel::GlobalMedianCompute(const CpuKernelContext &ctx) { + auto input_x0 = reinterpret_cast(ctx.Input(0)->GetData()); + auto output_y0 = reinterpret_cast(ctx.Output(0)->GetData()); + size_t data_num = ctx.Input(0)->GetTensorShape()->NumElements(); + const int64_t half = 2; + std::nth_element(input_x0, input_x0 + static_cast((data_num - 1) / half), input_x0 + data_num); + *output_y0 = *(input_x0 + static_cast((data_num - 1) / half)); + return KERNEL_STATUS_OK; +} + +template +uint32_t MedianCpuKernel::MedianCompute(const CpuKernelContext &ctx) { + auto input_x0 = reinterpret_cast(ctx.Input(0)->GetData()); + auto output_y0 = reinterpret_cast(ctx.Output(0)->GetData()); + auto output_y1 = reinterpret_cast(ctx.Output(1)->GetData()); + + auto input_shape_ptr = ctx.Input(0)->GetTensorShape(); + int64_t input_shape_dims = input_shape_ptr->GetDims(); + if (input_shape_dims == 0) { + *output_y0 = *input_x0; + *output_y1 = 0; + + return KERNEL_STATUS_OK; + } + + int64_t dim_num = 0; + AttrValue *dim_ptr = ctx.GetAttr("axis"); + if (dim_ptr != nullptr) { + dim_num = dim_ptr->GetInt(); + } + if (dim_num < 0) { + dim_num += input_shape_dims; + } + auto input_shape_0 = ctx.Input(0)->GetTensorShape()->GetDimSizes(); + int64_t dim_data_num = input_shape_0[dim_num]; + T *temp_median_vec = new T[dim_data_num]; + int64_t *temp_median_index_vec = new int64_t[dim_data_num]; + int64_t group = 1; + int64_t jump = 1; + + int64_t median_pos = static_cast((dim_data_num - 1) / 2); + + if (dim_num != 0) { + for (int64_t i = 0; i < dim_num; i++) { + group *= input_shape_0[i]; + } + } + if (dim_num != input_shape_dims - 1) { + for (int64_t i = dim_num + 1; i < input_shape_dims; i++) { + jump *= input_shape_0[i]; + } + } + + T *start = input_x0; + for (int64_t i = 0; i < group; i++) { + for (int64_t j = 0; j < jump; j++) { + for (int64_t k = 0; k < dim_data_num; k++) { + auto num_index = start + k * jump + j; + temp_median_index_vec[k] = k; + temp_median_vec[k] = *num_index; + } + std::nth_element(temp_median_index_vec, temp_median_index_vec + median_pos, temp_median_index_vec + dim_data_num, + [&temp_median_vec, dim_data_num](int64_t pos1, int64_t pos2) { + return (*(temp_median_vec + pos1) < *(temp_median_vec + pos2)) || + (pos1 >= 0 && pos1 < dim_data_num && + *(temp_median_vec + pos1) == *(temp_median_vec + pos2) && pos1 < pos2); + }); + std::nth_element(temp_median_vec, temp_median_vec + median_pos, temp_median_vec + dim_data_num); + *(output_y0 + i * jump + j) = *(temp_median_vec + median_pos); + *(output_y1 + i * jump + j) = *(temp_median_index_vec + median_pos); + } + if (i != group - 1) { + start += jump * dim_data_num; + } + }; + + delete[] temp_median_vec; + delete[] temp_median_index_vec; + + return KERNEL_STATUS_OK; +} +REGISTER_CPU_KERNEL(kMedian, MedianCpuKernel); +} // namespace aicpu diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.h new file mode 100644 index 00000000000..5677bd0fad9 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/median.h @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_KERNELS_NORMALIZED_MEDIAN_H_ +#define AICPU_KERNELS_NORMALIZED_MEDIAN_H_ + +#include "cpu_ops_kernel.h" +#include "utils/bcast.h" + +namespace aicpu { +class MedianCpuKernel : public CpuKernel { + public: + MedianCpuKernel() = default; + ~MedianCpuKernel() override = default; + uint32_t Compute(const CpuKernelContext &ctx) override; + + private: + uint32_t MedianCheck(const CpuKernelContext &ctx); + template + uint32_t GlobalMedianCompute(const CpuKernelContext &ctx); + template + uint32_t MedianCompute(const CpuKernelContext &ctx); +}; +} // namespace aicpu +#endif diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc new file mode 100644 index 00000000000..5a82abe033e --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc @@ -0,0 +1,280 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mediangrad.h" + +#include "cpu_kernel_utils.h" +#include "utils/eigen_tensor.h" +#include "utils/kernel_util.h" + +namespace { +const char *kMedianGrad = "MedianGrad"; +const uint32_t kOutputNum = 1; +const uint32_t kInputNum = 4; +const uint32_t kGlobalOutputNum = 1; +const uint32_t kGlobalInputNum = 3; +// when input data size is more than kParallelDataNum, use Parallel func +const int64_t kParallelDataNum = 2 * 1024; +const int64_t kParallelDataNumMid = 16 * 1024; + +#define MEDIANGRAD_COMPUTE_CASE(DTYPE, TYPE, TYPE2, CTX) \ + case (DTYPE): { \ + uint32_t result = MedianGradCompute(CTX); \ + if (result != KERNEL_STATUS_OK) { \ + KERNEL_LOG_ERROR("MedianGrad kernel compute failed."); \ + return result; \ + } \ + break; \ + } + +#define GLOBALMEDIANGRAD_COMPUTE_CASE(DTYPE, TYPE, TYPE2, CTX) \ + case (DTYPE): { \ + uint32_t result = GlobalMedianGradCompute(CTX); \ + if (result != KERNEL_STATUS_OK) { \ + KERNEL_LOG_ERROR("GlobalMedianGrad kernel compute failed."); \ + return result; \ + } \ + break; \ + } +} // namespace + +namespace aicpu { +uint32_t MedianGradCpuKernel::Compute(const CpuKernelContext &ctx) { + // check params + KERNEL_HANDLE_ERROR(MedianGradParamCheck(ctx), "MedianGrad check params failed."); + auto data_type_x = ctx.Input(1)->GetDataType(); + AttrValue *global_median_ptr = ctx.GetAttr("global_median"); + bool global_median = global_median_ptr->GetBool(); + if (global_median == false) { + switch (data_type_x) { + MEDIANGRAD_COMPUTE_CASE(DT_INT16, int16_t, float, ctx) + MEDIANGRAD_COMPUTE_CASE(DT_INT32, int32_t, float, ctx) + MEDIANGRAD_COMPUTE_CASE(DT_INT64, int64_t, float, ctx) + MEDIANGRAD_COMPUTE_CASE(DT_FLOAT, float, float, ctx) + MEDIANGRAD_COMPUTE_CASE(DT_DOUBLE, double, double, ctx) + default: + KERNEL_LOG_ERROR("MedianGrad kernel data type [%s] of input x not support.", DTypeStr(data_type_x).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + } else { + switch (data_type_x) { + GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT16, int16_t, float, ctx) + GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT32, int32_t, float, ctx) + GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT64, int64_t, float, ctx) + GLOBALMEDIANGRAD_COMPUTE_CASE(DT_FLOAT, float, float, ctx) + GLOBALMEDIANGRAD_COMPUTE_CASE(DT_DOUBLE, double, double, ctx) + default: + KERNEL_LOG_ERROR("GlobalMedianGrad kernel data type [%s] of input x not support.", + DTypeStr(data_type_x).c_str()); + return KERNEL_STATUS_PARAM_INVALID; + } + } + return KERNEL_STATUS_OK; +} + +uint32_t MedianGradCpuKernel::MedianGradParamCheck(const CpuKernelContext &ctx) { + auto global_median_ptr = ctx.GetAttr("global_median"); + KERNEL_CHECK_NULLPTR(global_median_ptr, KERNEL_STATUS_PARAM_INVALID, "Get attr global_median failed."); + bool global_median = global_median_ptr->GetBool(); + + if (global_median == false) { + KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "MedianGrad check input and output number failed."); + } else { + KERNEL_HANDLE_ERROR(NormalCheck(ctx, kGlobalInputNum, kGlobalOutputNum), + "GlobalMedianGrad check input and output number failed."); + } + + Tensor *input_y_grad = ctx.Input(0); + Tensor *input_x = ctx.Input(1); + Tensor *input_y = ctx.Input(2); + Tensor *output_x_grad = ctx.Output(0); + + int64_t y_grad_num = ctx.Input(0)->GetTensorShape()->NumElements(); + int64_t y_num = ctx.Input(2)->GetTensorShape()->NumElements(); + KERNEL_CHECK_FALSE((y_num == y_grad_num), KERNEL_STATUS_PARAM_INVALID, + "The data num of input y_grad [%llu] is different from y [%llu].", y_grad_num, y_num) + auto data_type_x = ctx.Input(1)->GetDataType(); + auto data_type_y_grad = ctx.Input(0)->GetDataType(); + KERNEL_CHECK_FALSE((data_type_y_grad == data_type_x), KERNEL_STATUS_PARAM_INVALID, + "The data type of input y_grad [%s] is different from x [%s].", DTypeStr(data_type_y_grad).c_str(), + DTypeStr(data_type_x).c_str()) + + if (global_median == false) { + Tensor *input_indices = ctx.Input(3); + KERNEL_LOG_DEBUG( + "MedianGradCpuKernel[%s], input_y_grad: size[%llu]," + "input_x: size[%llu], input_y: size[%llu]," + "input_indices: size[%llu], output_x_grad: size[%llu].", + ctx.GetOpType().c_str(), input_y_grad->GetDataSize(), input_x->GetDataSize(), input_y->GetDataSize(), + input_indices->GetDataSize(), output_x_grad->GetDataSize()); + } else { + KERNEL_LOG_DEBUG( + "MedianGradCpuKernel[%s], input_y_grad: size[%llu]," + "input_x: size[%llu], input_y: size[%llu]," + "output_x_grad: size[%llu].", + ctx.GetOpType().c_str(), input_y_grad->GetDataSize(), input_x->GetDataSize(), input_y->GetDataSize(), + output_x_grad->GetDataSize()); + } + + return KERNEL_STATUS_OK; +} + +template +uint32_t MedianGradCpuKernel::GlobalMedianGradCompute(const CpuKernelContext &ctx) { + auto y_grad = reinterpret_cast(ctx.Input(0)->GetData()); + auto x = reinterpret_cast(ctx.Input(1)->GetData()); + auto y = reinterpret_cast(ctx.Input(2)->GetData()); + auto x_grad = reinterpret_cast(ctx.Output(0)->GetData()); + int64_t output_data_num = ctx.Output(0)->NumElements(); + int64_t input_data_num = ctx.Input(1)->NumElements(); + + T2 count_repeat = 0; + for (int64_t i = 0; i < input_data_num; i++) { + count_repeat += (*(x + i) == *y) ? 1 : 0; + } + + if (output_data_num >= kParallelDataNum) { + uint32_t min_core_num = 1; + uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2); + + if (output_data_num <= kParallelDataNumMid) { + max_core_num = std::min(max_core_num, 4U); // up to 4 cpu cores + } + + if (max_core_num > output_data_num) { + max_core_num = output_data_num; + } + + auto sharder_mediangrad = [&](int64_t start, int64_t end) { + for (int64_t i = start; i < end; i++) { + *(x_grad + i) = (*(x + i) == *y) ? (*y_grad / count_repeat) : 0; + } + }; + KERNEL_HANDLE_ERROR( + CpuKernelUtils::ParallelFor(ctx, output_data_num, output_data_num / max_core_num, sharder_mediangrad), + "MedianGrad Compute failed."); + } else { + for (int64_t i = 0; i < output_data_num; i++) { + *(x_grad + i) = (*(x + i) == *y) ? (*y_grad / count_repeat) : 0; + } + } + return KERNEL_STATUS_OK; +} + +template +uint32_t MedianGradCpuKernel::MedianGradCompute(const CpuKernelContext &ctx) { + auto y_grad = reinterpret_cast(ctx.Input(0)->GetData()); + auto indices = reinterpret_cast(ctx.Input(3)->GetData()); + auto x_grad = reinterpret_cast(ctx.Output(0)->GetData()); + int64_t output_data_num = ctx.Output(0)->NumElements(); + int64_t need_calculate_num = ctx.Input(0)->NumElements(); + + for (int64_t i = 0; i < output_data_num; i++) { + *(x_grad + i) = 0; + } + + AttrValue *axis_ptr = ctx.GetAttr("axis"); + int64_t axis = axis_ptr == nullptr ? 0 : axis_ptr->GetInt(); + + std::vector shape_x = ctx.Input(1)->GetTensorShape()->GetDimSizes(); + std::vector shape_y = ctx.Input(2)->GetTensorShape()->GetDimSizes(); + + std::vector shape_keepdim; + int64_t dim_num_x = ctx.Input(1)->GetTensorShape()->GetDims(); + axis = axis >= 0 ? axis : axis + dim_num_x; + for (int64_t i = 0; i < dim_num_x; i++) { + if (i == axis) { + shape_keepdim.push_back(1); + } else { + shape_keepdim.push_back(shape_x[i]); + } + } + + std::vector element_num_each_dim_x; + std::vector element_num_each_dim_y; + int64_t element_num_y = 1; + int64_t element_num_x = 1; + for (int64_t i = shape_keepdim.size() - 1; i >= 0; i--) { + element_num_each_dim_x.insert(element_num_each_dim_x.begin(), element_num_x); + element_num_x *= shape_x[i]; + element_num_each_dim_y.insert(element_num_each_dim_y.begin(), element_num_y); + element_num_y *= shape_keepdim[i]; + } + + if (need_calculate_num >= kParallelDataNum) { + uint32_t min_core_num = 1; + uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2); + + if (need_calculate_num <= kParallelDataNumMid) { + max_core_num = std::min(max_core_num, 4U); // up to 4 cpu cores + } + + if (max_core_num > need_calculate_num) { + max_core_num = need_calculate_num; + } + + auto sharder_mediangrad = [&](int64_t start, int64_t end) { + std::vector dim_vec; + for (int64_t i = 0; i < dim_num_x; i++) { + dim_vec.push_back(0); + } + for (int64_t nth_element = start; nth_element < end; nth_element++) { + int64_t elements_remain = nth_element; + for (int64_t i = 0; i < dim_num_x; i++) { + dim_vec[i] = elements_remain / element_num_each_dim_y[i]; + elements_remain %= element_num_each_dim_y[i]; + } + int64_t update_element_pos = 0; + for (int64_t i = 0; i < dim_num_x; i++) { + if (i == axis) { + update_element_pos += *(indices + nth_element) * element_num_each_dim_x[i]; + } else { + update_element_pos += dim_vec[i] * element_num_each_dim_x[i]; + } + } + *(x_grad + update_element_pos) = *(y_grad + nth_element); + } + }; + KERNEL_HANDLE_ERROR( + CpuKernelUtils::ParallelFor(ctx, need_calculate_num, need_calculate_num / max_core_num, sharder_mediangrad), + "MedianGrad Compute failed."); + } else { + std::vector dim_vec; + for (int64_t i = 0; i < dim_num_x; i++) { + dim_vec.push_back(0); + } + for (int64_t nth_element = 0; nth_element < need_calculate_num; nth_element++) { + int64_t elements_remain = nth_element; + for (int64_t i = 0; i < dim_num_x; i++) { + dim_vec[i] = elements_remain / element_num_each_dim_y[i]; + elements_remain %= element_num_each_dim_y[i]; + } + int64_t update_element_pos = 0; + for (int64_t i = 0; i < dim_num_x; i++) { + if (i == axis) { + update_element_pos += *(indices + nth_element) * element_num_each_dim_x[i]; + } else { + update_element_pos += dim_vec[i] * element_num_each_dim_x[i]; + } + } + *(x_grad + update_element_pos) = *(y_grad + nth_element); + } + } + return KERNEL_STATUS_OK; +} + +REGISTER_CPU_KERNEL(kMedianGrad, MedianGradCpuKernel); +} // namespace aicpu diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.h new file mode 100644 index 00000000000..70239e5734a --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.h @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_KERNELS_NORMALIZED_MEDIANGRAD_H_ +#define AICPU_KERNELS_NORMALIZED_MEDIANGRAD_H_ + +#include "cpu_ops_kernel.h" +#include "utils/bcast.h" + +namespace aicpu { +class MedianGradCpuKernel : public CpuKernel { + public: + MedianGradCpuKernel() = default; + ~MedianGradCpuKernel() override = default; + + protected: + uint32_t Compute(const CpuKernelContext &ctx) override; + + private: + uint32_t MedianGradParamCheck(const CpuKernelContext &ctx); + + template + uint32_t MedianGradCompute(const CpuKernelContext &ctx); + + template + uint32_t GlobalMedianGradCompute(const CpuKernelContext &ctx); +}; +} // namespace aicpu +#endif diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.cc new file mode 100644 index 00000000000..a930d8c7fe4 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.cc @@ -0,0 +1,21 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "eigen_tensor.h" + +namespace aicpu { +const Tensor *EigenTensor::GetTensor() const { return tensor_; } +} // namespace aicpu \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h new file mode 100644 index 00000000000..a151025319d --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/eigen_tensor.h @@ -0,0 +1,170 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_EIGENTENSOR_H +#define AICPU_EIGENTENSOR_H + +#include "cpu_tensor.h" +#include "kernel_log.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace aicpu { +// Helper to define Tensor types given that the scalar is of type T. +template +struct TTypes { + // Rank- tensor of scalar type T. + typedef Eigen::TensorMap, Eigen::Aligned> Tensor; + typedef Eigen::TensorMap, Eigen::Aligned> ConstTensor; + + // Unaligned Rank- tensor of scalar type T. + typedef Eigen::TensorMap > UnalignedTensor; + typedef Eigen::TensorMap > UnalignedConstTensor; + + typedef Eigen::TensorMap, Eigen::Aligned> Tensor32Bit; + + // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. + typedef Eigen::TensorMap, Eigen::RowMajor, IndexType>, Eigen::Aligned> + Scalar; + typedef Eigen::TensorMap, Eigen::RowMajor, IndexType>, Eigen::Aligned> + ConstScalar; + + // Unaligned Scalar tensor of scalar type T. + typedef Eigen::TensorMap, Eigen::RowMajor, IndexType> > UnalignedScalar; + typedef Eigen::TensorMap, Eigen::RowMajor, IndexType> > + UnalignedConstScalar; + + // Rank-1 tensor (vector) of scalar type T. + typedef Eigen::TensorMap, Eigen::Aligned> Flat; + typedef Eigen::TensorMap, Eigen::Aligned> ConstFlat; + typedef Eigen::TensorMap, Eigen::Aligned> Vec; + typedef Eigen::TensorMap, Eigen::Aligned> ConstVec; + + // Unaligned Rank-1 tensor (vector) of scalar type T. + typedef Eigen::TensorMap > UnalignedFlat; + typedef Eigen::TensorMap > UnalignedConstFlat; + typedef Eigen::TensorMap > UnalignedVec; + typedef Eigen::TensorMap > UnalignedConstVec; + + // Rank-2 tensor (matrix) of scalar type T. + typedef Eigen::TensorMap, Eigen::Aligned> Matrix; + typedef Eigen::TensorMap, Eigen::Aligned> ConstMatrix; + + // Unaligned Rank-2 tensor (matrix) of scalar type T. + typedef Eigen::TensorMap > UnalignedMatrix; + typedef Eigen::TensorMap > UnalignedConstMatrix; +}; +} // namespace aicpu + +namespace aicpu { + +class EigenTensor { + public: + EigenTensor() = delete; + EigenTensor(Tensor *tensor, void *data) : tensor_(tensor), tensor_data_(data) {} + ~EigenTensor() = default; + + /* + * Get tensor + * @return succ: tensor, error : nullptr + */ + const Tensor *GetTensor() const; + + /* + * Eigen vec + * @return Eigen vec + */ + template + typename TTypes::Vec vec() { + return tensor(); + } + + /* + * Eigen matrix + * @return Eigen matrix + */ + template + typename TTypes::Matrix matrix() { + return tensor(); + } + + /* + * Eigen ConstMatrix + * @return Eigen ConstMatrix + */ + template + typename TTypes::ConstMatrix matrix() const { + return tensor(); + } + + /* + * Eigen tensor + * @return Eigen tensor + */ + template + typename TTypes::Tensor tensor() { + return typename TTypes::Tensor(reinterpret_cast(tensor_data_), AsEigenDSizes()); + } + + /* + * Eigen ConstTensor + * @return Eigen ConstTensor + */ + template + typename TTypes::ConstTensor tensor() const { + return typename TTypes::ConstTensor(reinterpret_cast(tensor_data_), AsEigenDSizes()); + } + + /* + * Eigen Flat + * @return Eigen Flat + */ + template + typename TTypes::Flat flat() { + return typename TTypes::Flat(reinterpret_cast(tensor_data_), {tensor_->GetTensorShape()->NumElements()}); + } + + /* + * which case we pad the rest of the sizes with 1. + * @return Eigen::DSizes: pad the rest of the sizes with 1 + */ + template + Eigen::DSizes AsEigenDSizesWithPadding() const { + Eigen::DSizes dsizes; + for (int d = 0; d < tensor_->GetTensorShape()->GetDims(); d++) { + dsizes[d] = static_cast(tensor_->GetTensorShape()->GetDimSize(d)); + } + for (int d = tensor_->GetTensorShape()->GetDims(); d < NDIMS; d++) { + dsizes[d] = 1; + } + return dsizes; + } + + /* + * Fill `*dsizes` from `*this` + * @return Eigen::DSizes: pad the rest of the sizes with 1 + */ + template + Eigen::DSizes AsEigenDSizes() const { + return AsEigenDSizesWithPadding(); + } + + private: + Tensor *tensor_; + void *tensor_data_; +}; +} // namespace aicpu + +#endif // AICPU_EIGENTENSOR_H diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc index b703d574d96..c9972658d9b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/aicpu_lib_select.cc @@ -48,7 +48,14 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An kSliceGradOpName, kRandomShuffleOpName, kRangeOpName}; - static const std::set kMigrateAicpuKernelOps = {kACosOpName}; + static const std::set kMigrateAicpuKernelOps = { + mindspore::kACosOpName, + mindspore::kLogMatrixDeterminantOpName, + mindspore::kAdaptiveAvgPool2dOpName, + mindspore::kAdaptiveAvgPool2dGradOpName, + mindspore::kMedianOpName, + mindspore::kMedianGradOpName, + }; static const std::string kEnvOpSoNames = "mindspore_aicpu_kernels"; static const std::string kCpuKernelSoName = "mindspore_cpu_kernels";