aicpu migration gp high priority

This commit is contained in:
lilinjie 2022-12-22 17:13:16 +08:00
parent 9f6e5709fa
commit 85f2032bf8
18 changed files with 1514 additions and 3 deletions

View File

@ -74,3 +74,16 @@
"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/op_proto/add_dsl.cc" "syntaxError"
"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/op_proto/matmul_tik.cc" "syntaxError"
# AICPU migration
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "useStlAlgorithm"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "variableScope"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constParameter"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constVariable"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "unreadVariable"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "redundantAssignment"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "constArgument"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "useStlAlgorithm"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "variableScope"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "constParameter"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "constVariable"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "unreadVariable"

View File

@ -106,3 +106,28 @@
"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "build/include"
"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "build/include"
# AICPU migration
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include_subdir"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "runtime/references"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include_what_you_use"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/indent"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/ending_newline"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "runtime/explicit"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/braces"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/namespace"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/braces"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/include"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "whitespace/end_of_line"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "readability/casting"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/" "build/namespaces"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include_subdir"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "runtime/references"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include_what_you_use"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/indent"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/ending_newline"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "runtime/explicit"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "readability/braces"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "readability/namespace"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/braces"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "build/include"
"mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/utils/" "whitespace/end_of_line"

View File

@ -266,3 +266,6 @@ mindspore/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc:mindspore
mindspore/mindspore/python/mindspore/ops/function/nn_func.py:conv3d
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/matmul_avx512_mask_fp32.c:GemmRowxColMaskKernelFp32
mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/crop_and_resize_cpu_kernel.cc:mindspore::kernel::CropAndResizeCpuKernelMod::LaunchKernel
# AICPU migration
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/mediangrad.cc:aicpu::MedianGradCpuKernel::MedianGradCompute

View File

@ -45,6 +45,8 @@ constexpr auto kAdamOpName = "Adam";
constexpr auto kAdamWeightDecayName = "AdamWeightDecay";
constexpr auto kAdaptiveMaxPool2dOpName = "AdaptiveMaxPool2d";
constexpr auto kAdaptiveMaxPool2DOpName = "AdaptiveMaxPool2D";
constexpr auto kAdaptiveAvgPool2dOpName = "AdaptiveAvgPool2d";
constexpr auto kAdaptiveAvgPool2dGradOpName = "AdaptiveAvgPool2dGrad";
constexpr auto kAdaptiveMaxPool3DGradOpName = "AdaptiveMaxPool3DGrad";
constexpr auto kAddNOpName = "AddN";
constexpr auto kAddOpName = "Add";
@ -373,6 +375,7 @@ constexpr auto kLessOpName = "Less";
constexpr auto kLinSpaceOpName = "LinSpace";
constexpr auto kLinSpaceDOpName = "LinSpaceD";
constexpr auto kListDiffOpName = "ListDiff";
constexpr auto kLogMatrixDeterminantOpName = "LogMatrixDeterminant";
constexpr auto kLogOpName = "Log";
constexpr auto kLogSoftmaxOpName = "LogSoftmax";
constexpr auto kLogSoftmaxV2OpName = "LogSoftmaxV2";
@ -404,6 +407,8 @@ constexpr auto kMaxPoolV2OpName = "MaxPoolV2";
constexpr auto kMaxPoolExt2OpName = "MaxPoolExt2";
constexpr auto kMaxPoolWithArgmaxOpName = "MaxPoolWithArgmax";
constexpr auto kMeanGradOpName = "MeanGrad";
constexpr auto kMedianOpName = "Median";
constexpr auto kMedianGradOpName = "MedianGrad";
constexpr auto kMemCpyAsyncOpName = "memcpy_async";
constexpr auto kMinimumGradOpName = "MinimumGrad";
constexpr auto kMinimumOpName = "Minimum";

View File

@ -53,9 +53,9 @@ target_link_libraries(mindspore_cpu_kernels PRIVATE
-pthread
)
set(INSTALL_LIBRARY_DIR lib)
set(INSTALL_LIBRARY_DIR lib/plugin)
install(TARGETS mindspore_cpu_kernels OPTIONAL
EXPORT mindspore_cpu_kernels-targets
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/ascend
)

View File

@ -0,0 +1,199 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cpu_kernel/ms_kernel/adaptive_avg_pool_2d.h"
#include "cpu_kernel/common/cpu_kernel_utils.h"
#include "utils/eigen_tensor.h"
#include "utils/kernel_util.h"
using namespace std;
namespace {
const char *kAdaptiveAvgPool2d = "AdaptiveAvgPool2d";
constexpr uint32_t kInputNum = 1;
constexpr uint32_t kOutputNum = 1;
constexpr int64_t kParallelDataNums = 4 * 1024;
constexpr int64_t kthree = 3;
constexpr int64_t kneg_three = -3;
constexpr int64_t kfour = 4;
constexpr int64_t ktwo = 2;
constexpr int64_t kneg_two = -2;
template <typename SCALAR_T>
struct AdaptiveCalcArgs {
SCALAR_T *input_data = nullptr;
SCALAR_T *output_data = nullptr;
int64_t size_b = 1;
int64_t size_d = 0;
int64_t in_size_h = 0;
int64_t in_size_w = 0;
int64_t out_size_h = 0;
int64_t out_size_w = 0;
int64_t in_stride_d = 0;
int64_t in_stride_h = 0;
int64_t in_stride_w = 0;
};
#define SWITCH_PARALLEL(SHARD, end_num, num) \
if ((num) <= kParallelDataNums) { \
for (size_t i = 0; i < size_t(end_num); i++) { \
SHARD(i, i + 1); \
} \
} else { \
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, end_num, 1, SHARD), \
"AdaptiveAvgPool2d #SHARD Compute failed."); \
}
} // namespace
namespace aicpu {
template <typename SCALAR_T>
SCALAR_T ComputeSum(int64_t span_h, int64_t span_w, SCALAR_T *in_point, AdaptiveCalcArgs<SCALAR_T> &args) {
SCALAR_T sum = static_cast<SCALAR_T>(0.);
for (int in_h = 0; in_h < span_h; in_h++) {
for (int in_w = 0; in_w < span_w; in_w++) {
SCALAR_T val = *(in_point + in_h * args.in_stride_h + in_w * args.in_stride_w);
sum += static_cast<SCALAR_T>(val);
}
}
return sum;
}
template <typename SCALAR_T>
void ComputeSingleThread(int64_t start, int64_t end, AdaptiveCalcArgs<SCALAR_T> args) {
for (auto d = start; d < end; d++) {
/* loop over output */
for (int64_t out_h = 0; out_h < args.out_size_h; out_h++) {
int in_start_h = StartIndex(out_h, args.out_size_h, args.in_size_h);
int in_end_h = EndIndex(out_h, args.out_size_h, args.in_size_h);
int span_h = in_end_h - in_start_h;
for (int64_t out_w = 0; out_w < args.out_size_w; out_w++) {
int in_start_w = StartIndex(out_w, args.out_size_w, args.in_size_w);
int in_end_w = EndIndex(out_w, args.out_size_w, args.in_size_w);
int span_w = in_end_w - in_start_w;
// local pointers
SCALAR_T *in_point =
args.input_data + d * args.in_stride_d + in_start_h * args.in_stride_h + in_start_w * args.in_stride_w;
SCALAR_T *out_point =
args.output_data + d * args.out_size_h * args.out_size_w + out_h * args.out_size_w + out_w;
/* compute local average */
/* set output to local average */
*out_point = SCALAR_T(ComputeSum(span_h, span_w, in_point, args) / static_cast<SCALAR_T>(span_h * span_w));
}
}
}
}
template <typename SCALAR_T>
uint32_t AdaptiveAvgPool2dOutFrame(const CpuKernelContext &ctx, AdaptiveCalcArgs<SCALAR_T> args, int64_t num) {
auto shard_frame = [&](int64_t start, int64_t end) { ComputeSingleThread(start, end, args); };
SWITCH_PARALLEL(shard_frame, args.size_d, num);
return KERNEL_STATUS_OK;
}
template <typename SCALAR_T>
uint32_t AdaptiveAvgPool2dOutTemplate(const CpuKernelContext &ctx) {
Tensor &input = *(ctx.Input(kFirstInputIndex));
auto input_shape_ptr = input.GetTensorShape();
int32_t input_dims = input_shape_ptr->GetDims();
KERNEL_CHECK_NULLPTR(input_shape_ptr, KERNEL_STATUS_PARAM_INVALID, "Get input 0 shape failed.");
KERNEL_CHECK_FALSE((input_dims == kthree || input_dims == kfour), KERNEL_STATUS_PARAM_INVALID,
"Non-empty [3D] or [4D] (batch mode) tensor expected for input 0.");
for (int32_t i = 0; i < input_dims; i++) {
KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(i) > 0), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d: expected input to have non-empty spatial "
"dimensions, "
"but input 0 has sizes [%d] with dimension [%d] being empty.",
input_dims, i);
}
AdaptiveCalcArgs<SCALAR_T> args;
// sizes
std::vector<int64_t> input_dim_sizes = input_shape_ptr->GetDimSizes();
args.size_d = input_dim_sizes.end()[kneg_three];
args.in_size_h = input_dim_sizes.end()[kneg_two];
args.in_size_w = input_dim_sizes.end()[-1];
// strides
args.in_stride_w = 1;
args.in_stride_h = args.in_size_w;
args.in_stride_d = args.in_stride_h * args.in_size_h;
// output sizes
AttrValue *attr = ctx.GetAttr("output_size");
std::vector<int64_t> output_size_data = attr->GetListInt();
if (output_size_data.size() == ktwo) {
args.out_size_h = output_size_data[0] > 0 ? output_size_data[0] : input_dim_sizes.end()[-2];
args.out_size_w = output_size_data[1] > 0 ? output_size_data[1] : input_dim_sizes.end()[-1];
} else if (output_size_data.size() == 1) {
KERNEL_CHECK_FALSE((output_size_data[0] >= 0), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d: output_size value should be non-negative");
args.out_size_h = output_size_data[0];
args.out_size_w = output_size_data[0];
} else {
KERNEL_LOG_ERROR("output_size length should be 1 OR 2, but got [%d]", output_size_data.size());
return KERNEL_STATUS_PARAM_INVALID;
}
// indices will contain i,j locations for each output point
args.input_data = static_cast<SCALAR_T *>(input.GetData());
args.output_data = static_cast<SCALAR_T *>(ctx.Output(kFirstOutputIndex)->GetData());
int64_t num = input.NumElements();
// resize output
if (input_dims == kthree) {
AdaptiveAvgPool2dOutFrame<SCALAR_T>(ctx, args, num);
} else {
auto shard_template = [&](int64_t start, int64_t end) {
for (auto b = start; b < end; b++) {
AdaptiveCalcArgs<SCALAR_T> sub_args = args;
sub_args.input_data = args.input_data + b * args.in_stride_d * args.size_d;
sub_args.output_data = args.output_data + b * args.size_d * args.out_size_h * args.out_size_w;
AdaptiveAvgPool2dOutFrame<SCALAR_T>(ctx, sub_args, num);
}
};
SWITCH_PARALLEL(shard_template, input_dim_sizes[0], num);
}
return KERNEL_STATUS_OK;
}
uint32_t AdaptiveAvgPool2d::Compute(const CpuKernelContext &ctx) {
// check params
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "[%s] check input and output number failed.",
kAdaptiveAvgPool2d);
Tensor *input_0 = ctx.Input(kFirstInputIndex);
auto data_type = static_cast<DataType>(input_0->GetDataType());
// Compute by data_type
switch (data_type) {
case DT_FLOAT:
return AdaptiveAvgPool2dOutTemplate<float>(ctx);
case DT_FLOAT16:
return AdaptiveAvgPool2dOutTemplate<Eigen::half>(ctx);
default:
KERNEL_LOG_ERROR("AdaptiveAvgPool2d kernel data type [%s] not support.", DTypeStr(data_type).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
return KERNEL_STATUS_OK;
}
REGISTER_CPU_KERNEL(kAdaptiveAvgPool2d, AdaptiveAvgPool2d);
} // namespace aicpu

View File

@ -0,0 +1,46 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_H_
#define AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_H_
#include <cmath>
#include "cpu_kernel/inc/cpu_ops_kernel.h"
#include "cpu_kernel/inc/cpu_types.h"
namespace aicpu {
class AdaptiveAvgPool2d : public CpuKernel {
public:
AdaptiveAvgPool2d() = default;
~AdaptiveAvgPool2d() = default;
uint32_t Compute(const CpuKernelContext &ctx) override;
};
inline int StartIndex(int offset, int out_size, int in_size) {
if (out_size != 0) {
return static_cast<int>(std::floor(static_cast<float>((offset * in_size)) / out_size));
}
return 0;
}
inline int EndIndex(int offset, int out_size, int in_size) {
if (out_size != 0) {
return static_cast<int>(std::ceil(static_cast<float>(((offset + 1) * in_size)) / out_size));
}
return 0;
}
} // namespace aicpu
#endif // AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL3D_H_

View File

@ -0,0 +1,213 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cpu_kernel/ms_kernel/adaptive_avg_pool_2d_grad.h"
#include <cmath>
#include "utils/eigen_tensor.h"
#include "utils/kernel_util.h"
#include "cpu_kernel/common/cpu_kernel_utils.h"
namespace {
const char *kAdaptiveAvgPool2dGrad = "AdaptiveAvgPool2dGrad";
template <typename SCALAR_T>
struct AdaptiveCalcArgs {
SCALAR_T *input_data = nullptr;
SCALAR_T *output_data = nullptr;
int64_t in_size_b = 0;
int64_t in_size_d = 0;
int64_t in_size_h = 0;
int64_t in_size_w = 0;
int64_t out_size_h = 0;
int64_t out_size_w = 0;
int64_t out_stride_d = 0;
int64_t in_stride_d = 0;
int64_t out_stride_h = 0;
int64_t in_stride_h = 0;
};
// out_size is not be zero
inline int StartIndex(int offset, int out_size, int in_size) {
return (int)std::floor((float)(offset * in_size) / out_size);
}
// out_size is not be zero
inline int EndIndex(int offset, int out_size, int in_size) {
return (int)std::ceil((float)((offset + 1) * in_size) / out_size);
}
} // namespace
namespace aicpu {
template <typename SCALAR_T>
uint32_t AdaptiveAvgPool2dGradOutFrame(const CpuKernelContext &ctx, AdaptiveCalcArgs<SCALAR_T> args) {
uint32_t min_core_num = 1;
int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
int64_t total_size = args.in_size_d * args.in_size_b * args.out_size_h * args.out_size_w;
int64_t max_core_num_total = max_core_num;
if (max_core_num_total > total_size) {
max_core_num_total = total_size;
}
auto shard_init = [&](int64_t start, int64_t end) {
for (auto c = start; c < end; c++) {
args.output_data[c] = (SCALAR_T)0;
}
};
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, total_size, total_size / max_core_num_total, shard_init),
"AdaptiveAvgPool2dGrad Compute failed.");
int64_t in_size_db = args.in_size_d * args.in_size_b;
if (max_core_num > in_size_db) {
max_core_num = in_size_db;
}
// treat batch size and channels as one dimension
auto shard_work = [&](int64_t start, int64_t end) {
for (auto c = start; c < end; c++) {
SCALAR_T *output_offset_ptr = args.output_data + c * args.out_stride_d;
SCALAR_T *input_offset_ptr = args.input_data + c * args.in_stride_d;
for (int64_t ih = 0; ih < args.in_size_h; ih++) {
int64_t out_start_h = StartIndex(ih, args.in_size_h, args.out_size_h);
int64_t out_end_h = EndIndex(ih, args.in_size_h, args.out_size_h);
int64_t step_h = out_end_h - out_start_h;
for (int64_t iw = 0; iw < args.in_size_w; iw++) {
int64_t out_start_w = StartIndex(iw, args.in_size_w, args.out_size_w);
int64_t out_end_w = EndIndex(iw, args.in_size_w, args.out_size_w);
int64_t step_w = out_end_w - out_start_w;
if (step_w == 0 || step_h == 0) {
continue;
}
SCALAR_T grad_delta = input_offset_ptr[ih * args.in_stride_h + iw] / step_h / step_w;
int64_t oh = 0, ow = 0, output_size = args.out_stride_d;
for (oh = out_start_h; oh < out_end_h; oh++) {
for (ow = out_start_w; ow < out_end_w; ow++) {
int64_t output_idx = oh * args.out_stride_h + ow;
KERNEL_CHECK_FALSE_VOID((output_idx < output_size),
"Feature map output_idx [%lld] overflow output_size [%lld].", output_idx,
output_size);
output_offset_ptr[output_idx] += grad_delta;
}
}
}
}
}
};
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, in_size_db, in_size_db / max_core_num, shard_work),
"AdaptiveAvgPool2dGrad Compute failed.");
return KERNEL_STATUS_OK;
}
template <typename SCALAR_T>
uint32_t AdaptiveAvgPool2dGradOutCpuTemplate(const CpuKernelContext &ctx) {
Tensor &input = *(ctx.Input(kFirstInputIndex));
auto input_shape_ptr = input.GetTensorShape();
KERNEL_CHECK_NULLPTR(input_shape_ptr, KERNEL_STATUS_PARAM_INVALID, "Get input x shape failed.");
int32_t input_dims = input_shape_ptr->GetDims();
for (int32_t i = 0; i < input_dims; i++) {
KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(i) > 0), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d_grad: expected input to have non-empty spatial dimensions, "
"but input has sizes [%d] with dimension [%d] being empty.",
input_dims, i);
}
KERNEL_CHECK_FALSE(input_dims == 4, KERNEL_STATUS_PARAM_INVALID, "Non-empty [4D] tensor expected for input.");
AdaptiveCalcArgs<SCALAR_T> args;
args.in_size_b = 1;
args.in_size_d = 0;
args.in_size_h = 0;
args.in_size_w = 0;
args.out_size_h = 0;
args.out_size_w = 0;
args.out_stride_d = 1;
args.in_stride_d = 1;
args.out_stride_h = 1;
args.in_stride_h = 1;
std::vector<int64_t> orig_input_size = ctx.GetAttr("orig_input_shape")->GetListInt();
KERNEL_CHECK_FALSE((orig_input_size.size() == 4), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d_grad: internal error, orig_input_size.size() must be [4]");
KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(0) == orig_input_size[0]), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d_grad: internal error, orig_input_size Batch must equal "
"input_size Batch, now orig_input_size Batch is [%lld], input_size Batch is [%lld].",
input_shape_ptr->GetDimSize(0), orig_input_size[0]);
KERNEL_CHECK_FALSE((input_shape_ptr->GetDimSize(1) == orig_input_size[1]), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d_grad: internal error, orig_input_size Channel must equal "
"input_size channel, now orig_input_size Channel is [%lld], input_size Channel is [%lld].",
input_shape_ptr->GetDimSize(1), orig_input_size[1]);
int dim_w = 3;
int dim_h = 2;
// sizes
args.in_size_d = input_shape_ptr->GetDimSize(dim_h - 1);
args.in_size_h = input_shape_ptr->GetDimSize(dim_h);
args.in_size_w = input_shape_ptr->GetDimSize(dim_w);
args.out_size_h = orig_input_size[dim_h];
args.out_size_w = orig_input_size[dim_w];
KERNEL_CHECK_FALSE((args.out_size_h != 0 && args.out_size_w != 0), KERNEL_STATUS_PARAM_INVALID,
"Adaptive_avg_pool2d_grad: internal error, output_size H or W can not be zero, "
"now H is [%lld], W is [%lld].",
args.out_size_h, args.out_size_w);
// strides
// The calculation does not overflow because max value is number of user input data,
// which less then int64_t range.
args.out_stride_d = args.out_size_h * args.out_size_w;
args.out_stride_h = args.out_size_w;
args.in_stride_d = args.in_size_h * args.in_size_w;
args.in_stride_h = args.in_size_w;
args.input_data = static_cast<SCALAR_T *>(input.GetData());
args.output_data = static_cast<SCALAR_T *>(ctx.Output(kFirstOutputIndex)->GetData());
return AdaptiveAvgPool2dGradOutFrame<SCALAR_T>(ctx, args);
}
uint32_t AdaptiveAvgPool2dGrad::Compute(const CpuKernelContext &ctx) {
Tensor *input_0 = ctx.Input(kFirstInputIndex);
KERNEL_CHECK_NULLPTR(input_0, KERNEL_STATUS_PARAM_INVALID, "Get input tensor failed.");
KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input data failed.");
Tensor *output_0 = ctx.Output(kFirstOutputIndex);
KERNEL_CHECK_NULLPTR(output_0, KERNEL_STATUS_PARAM_INVALID, "Get output tensor failed.");
KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data failed.");
AttrValue *attr_orig_input_shape = ctx.GetAttr("orig_input_shape");
KERNEL_CHECK_NULLPTR(attr_orig_input_shape, KERNEL_STATUS_PARAM_INVALID, "[%s] get attr:orig_input_shape failed.",
kAdaptiveAvgPool2dGrad);
std::vector<int64_t> v_orig_input_shape = attr_orig_input_shape->GetListInt();
KERNEL_LOG_INFO("AdaptiveAvgPool2dGrad kernel, input[0]: size is [%llu]; output_0: size is [%llu].",
input_0->GetDataSize(), output_0->GetDataSize());
KERNEL_LOG_INFO("[%s] get attr:orig_input_shape [%s].", kAdaptiveAvgPool2dGrad,
VectorToString(v_orig_input_shape).c_str());
auto data_type = static_cast<DataType>(input_0->GetDataType());
// Compute by data_type
switch (data_type) {
case DT_FLOAT:
return AdaptiveAvgPool2dGradOutCpuTemplate<float>(ctx);
case DT_FLOAT16:
return AdaptiveAvgPool2dGradOutCpuTemplate<Eigen::half>(ctx);
default:
KERNEL_LOG_ERROR("AdaptiveAvgPool2dGrad kernel data type [%s] not support.", DTypeStr(data_type).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
}
REGISTER_CPU_KERNEL(kAdaptiveAvgPool2dGrad, AdaptiveAvgPool2dGrad);
} // namespace aicpu

View File

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_GRAD_H_
#define AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL2D_GRAD_H_
#include "cpu_kernel/inc/cpu_ops_kernel.h"
#include "cpu_kernel/inc/cpu_types.h"
namespace aicpu {
class AdaptiveAvgPool2dGrad : public CpuKernel {
public:
AdaptiveAvgPool2dGrad() = default;
~AdaptiveAvgPool2dGrad() = default;
protected:
uint32_t Compute(const CpuKernelContext &ctx) override;
};
} // namespace aicpu
#endif // AICPU_KERNELS_NORMALIZED_ADAPTIVE_AVG_POOL_2DGRAD_H_

View File

@ -0,0 +1,170 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "log_matrix_determinant.h"
#include "Eigen/LU"
#include "cpu_kernel_utils.h"
#include "utils/kernel_util.h"
namespace {
const uint32_t kOutputNum = 2;
const uint32_t kInputNum = 1;
const uint32_t kIndexTwo = 2;
const char *const kLogMatrixDeterminant = "LogMatrixDeterminant";
constexpr int64_t kParallelDataNums = 8 * 1024;
#define LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DTYPE, TYPE, CTX) \
case (DTYPE): { \
uint32_t result = LogMatrixDeterminantCompute<TYPE>(CTX); \
if (result != KERNEL_STATUS_OK) { \
KERNEL_LOG_ERROR("LogMatrixDeterminant kernel compute failed."); \
return result; \
} \
break; \
}
} // namespace
namespace aicpu {
uint32_t LogMatrixDeterminantCpuKernel::Compute(const CpuKernelContext &ctx) {
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "[%s] check input and output failed.",
kLogMatrixDeterminant);
KERNEL_HANDLE_ERROR(LogMatrixDeterminantCheck(ctx), "[%s] check params failed.", kLogMatrixDeterminant);
DataType data_type = ctx.Input(0)->GetDataType();
switch (data_type) {
LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_FLOAT, float, ctx)
LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_DOUBLE, double, ctx)
LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_COMPLEX64, std::complex<float>, ctx)
LOG_MATRIX_DETERMINANT_COMPUTE_CASE(DT_COMPLEX128, std::complex<double>, ctx)
default:
KERNEL_LOG_ERROR("LogMatrixDeterminant kernel data type [%s] not support.", DTypeStr(data_type).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
return KERNEL_STATUS_OK;
}
uint32_t LogMatrixDeterminantCpuKernel::LogMatrixDeterminantCheck(const CpuKernelContext &ctx) {
auto input_0 = ctx.Input(0);
auto output_0 = ctx.Output(0);
auto output_1 = ctx.Output(1);
KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input x data failed.")
KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output sign data failed.")
KERNEL_CHECK_NULLPTR(output_1->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output y data failed.")
KERNEL_CHECK_NULLPTR(input_0->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get input x tensor shape failed.")
KERNEL_CHECK_NULLPTR(output_0->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output sign tensor shape failed.")
KERNEL_CHECK_NULLPTR(output_1->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output y tensor shape failed.")
std::vector<int64_t> shape_x = input_0->GetTensorShape()->GetDimSizes();
std::vector<int64_t> shape_sign = output_0->GetTensorShape()->GetDimSizes();
std::vector<int64_t> shape_y = output_1->GetTensorShape()->GetDimSizes();
size_t shape_size_x = shape_x.size();
size_t shape_size_sign = shape_sign.size();
size_t shape_size_y = shape_y.size();
KERNEL_CHECK_FALSE((shape_size_x > 1), KERNEL_STATUS_PARAM_INVALID, "Input x must be at least rank 2, got [%zu].",
shape_size_x)
KERNEL_CHECK_FALSE((shape_x[shape_size_x - 1] > 0), KERNEL_STATUS_PARAM_INVALID,
"Input x last dimension must be at least 1.")
KERNEL_CHECK_FALSE((shape_x[shape_size_x - kIndexTwo] == shape_x[shape_size_x - 1]), KERNEL_STATUS_PARAM_INVALID,
"Input x dimensions must be equal, but are [%lld] and [%lld].", shape_x[shape_size_x - kIndexTwo],
shape_x[shape_size_x - 1])
KERNEL_CHECK_FALSE((shape_size_sign == shape_size_x - kIndexTwo), KERNEL_STATUS_PARAM_INVALID,
"Output sign must be rank [%zu], got [%zu].", shape_size_x - kIndexTwo, shape_size_sign)
KERNEL_CHECK_FALSE((shape_size_y == shape_size_x - kIndexTwo), KERNEL_STATUS_PARAM_INVALID,
"Output y must be rank [%zu], got [%zu].", shape_size_x - kIndexTwo, shape_size_y)
for (size_t i = 0; i < shape_size_x - kIndexTwo; i++) {
KERNEL_CHECK_FALSE((shape_sign[i] == shape_x[i]), KERNEL_STATUS_PARAM_INVALID,
"Output sign and Input x dimension [%zu] must be equal, got [%lld] and [%lld].", i,
shape_sign[i], shape_x[i])
KERNEL_CHECK_FALSE((shape_y[i] == shape_x[i]), KERNEL_STATUS_PARAM_INVALID,
"Output y and Input x dimension [%zu] must be equal, got [%lld] and [%lld].", i, shape_y[i],
shape_x[i])
}
return KERNEL_STATUS_OK;
}
template <typename T>
uint32_t LogMatrixDeterminantCpuKernel::LogMatrixDeterminantCompute(const CpuKernelContext &ctx) {
auto input_x = reinterpret_cast<T *>(ctx.Input(0)->GetData());
auto output_sign = reinterpret_cast<T *>(ctx.Output(0)->GetData());
auto output_y = reinterpret_cast<T *>(ctx.Output(1)->GetData());
std::vector<int64_t> shape_x = ctx.Input(0)->GetTensorShape()->GetDimSizes();
size_t shape_size = shape_x.size();
int64_t m = shape_x[shape_size - 1];
int64_t size_mm = m * m;
typedef Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MartixXd;
using RealT = typename Eigen::NumTraits<T>::Real;
if (size_mm > 0) {
int64_t martix_num = ctx.Input(0)->NumElements() / size_mm;
int64_t data_size = ctx.Input(0)->NumElements() * static_cast<int64_t>(sizeof(T));
if (data_size <= kParallelDataNums) {
for (int64_t i = 0; i < martix_num; i++) {
RealT log_abs_det = 0;
T sign = 1;
Eigen::Map<MartixXd> martix_x(input_x + i * m * m, m, m);
if (martix_x.size() > 0) {
Eigen::PartialPivLU<MartixXd> lu(martix_x);
MartixXd LU = lu.matrixLU();
sign = lu.permutationP().determinant();
auto diag = LU.diagonal().array().eval();
auto abs_diag = diag.cwiseAbs().eval();
log_abs_det += abs_diag.log().sum();
sign *= (diag / abs_diag).prod();
}
if (!Eigen::numext::isfinite(log_abs_det)) {
sign = 0;
log_abs_det = log_abs_det > 0 ? -std::log(RealT(0)) : std::log(RealT(0));
}
*(output_sign + i) = sign;
*(output_y + i) = log_abs_det;
}
} else {
uint32_t min_core_num = 1;
int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - kResvCpuNum);
if (max_core_num > martix_num) {
max_core_num = martix_num;
}
auto shard_work = [&](size_t start, size_t end) {
RealT log_abs_det = 0;
for (size_t i = start; i < end; i++) {
log_abs_det = 0;
T sign = 1;
Eigen::Map<MartixXd> martix_x(input_x + i * m * m, m, m);
if (martix_x.size() > 0) {
Eigen::PartialPivLU<MartixXd> lu(martix_x);
MartixXd LU = lu.matrixLU();
sign = static_cast<T>(lu.permutationP().determinant());
auto diag = LU.diagonal().array().eval();
auto abs_diag = diag.cwiseAbs().eval();
log_abs_det += abs_diag.log().sum();
sign *= (diag / abs_diag).prod();
}
if (!Eigen::numext::isfinite(log_abs_det)) {
sign = 0;
log_abs_det = log_abs_det > 0 ? -std::log(RealT(0)) : std::log(RealT(0));
}
*(output_sign + i) = sign;
*(output_y + i) = log_abs_det;
}
};
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, martix_num, martix_num / max_core_num, shard_work),
"LogMatrixDeterminant Compute failed.");
}
}
return KERNEL_STATUS_OK;
}
REGISTER_CPU_KERNEL(kLogMatrixDeterminant, LogMatrixDeterminantCpuKernel);
} // namespace aicpu

View File

@ -0,0 +1,35 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_KERNELS_NORMALIZED_LOG_MATRIX_DETERMINANT_H
#define AICPU_KERNELS_NORMALIZED_LOG_MATRIX_DETERMINANT_H
#include "cpu_ops_kernel.h"
namespace aicpu {
class LogMatrixDeterminantCpuKernel : public CpuKernel {
public:
LogMatrixDeterminantCpuKernel() = default;
~LogMatrixDeterminantCpuKernel() override = default;
uint32_t Compute(const CpuKernelContext &ctx) override;
private:
uint32_t LogMatrixDeterminantCheck(const CpuKernelContext &ctx);
template <typename T>
uint32_t LogMatrixDeterminantCompute(const CpuKernelContext &ctx);
};
} // namespace aicpu
#endif

View File

@ -0,0 +1,212 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "median.h"
#include "cpu_kernel_utils.h"
#include "utils/eigen_tensor.h"
#include "utils/kernel_util.h"
#include <algorithm>
namespace {
const uint32_t kInputNum = 1;
const uint32_t kOutputNum = 2;
const char *kMedian = "Median";
#define MEDIAN_COMPUTE_CASE(DTYPE, TYPE, CTX) \
case (DTYPE): { \
uint32_t result = MedianCompute<TYPE>(CTX); \
if (result != KERNEL_STATUS_OK) { \
KERNEL_LOG_ERROR("Median kernel compute failed."); \
return result; \
} \
break; \
}
#define GLOBAL_MEDIAN_COMPUTE_CASE(DTYPE, TYPE, CTX) \
case (DTYPE): { \
uint32_t result = GlobalMedianCompute<TYPE>(CTX); \
if (result != KERNEL_STATUS_OK) { \
KERNEL_LOG_ERROR("Median kernel compute failed."); \
return result; \
} \
break; \
}
} // namespace
namespace aicpu {
uint32_t MedianCpuKernel::Compute(const CpuKernelContext &ctx) {
KERNEL_HANDLE_ERROR(MedianCheck(ctx), "Median check params failed.");
auto data_type = ctx.Input(0)->GetDataType();
AttrValue *global_ptr = ctx.GetAttr("global_median");
bool global_median_bool = global_ptr->GetBool();
if (global_median_bool == false) {
switch (data_type) {
MEDIAN_COMPUTE_CASE(DT_INT16, int16_t, ctx)
MEDIAN_COMPUTE_CASE(DT_INT32, int32_t, ctx)
MEDIAN_COMPUTE_CASE(DT_INT64, int64_t, ctx)
MEDIAN_COMPUTE_CASE(DT_FLOAT, float, ctx)
MEDIAN_COMPUTE_CASE(DT_DOUBLE, double, ctx)
default:
KERNEL_LOG_ERROR("Median kernel data type [%s] not support.", DTypeStr(data_type).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
} else {
switch (data_type) {
GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT16, int16_t, ctx)
GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT32, int32_t, ctx)
GLOBAL_MEDIAN_COMPUTE_CASE(DT_INT64, int64_t, ctx)
GLOBAL_MEDIAN_COMPUTE_CASE(DT_FLOAT, float, ctx)
GLOBAL_MEDIAN_COMPUTE_CASE(DT_DOUBLE, double, ctx)
default:
KERNEL_LOG_ERROR("Median kernel data type [%s] not support.", DTypeStr(data_type).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
}
return KERNEL_STATUS_OK;
}
uint32_t MedianCpuKernel::MedianCheck(const CpuKernelContext &ctx) {
auto global_median = ctx.GetAttr("global_median");
KERNEL_CHECK_NULLPTR(global_median, KERNEL_STATUS_PARAM_INVALID, "Get attr global_median failed.");
bool global_median_value = global_median->GetBool();
if (global_median_value == false) {
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "Median check input and output number failed.");
auto input_shape_ptr = ctx.Input(0)->GetTensorShape();
int64_t input_shape_dims = input_shape_ptr->GetDims();
int64_t dim_num = 0;
AttrValue *dim_ptr = ctx.GetAttr("axis");
if (dim_ptr != nullptr) dim_num = dim_ptr->GetInt();
if (input_shape_dims != 0) {
KERNEL_CHECK_FALSE((dim_num >= (0 - input_shape_dims) && dim_num <= (input_shape_dims - 1)),
KERNEL_STATUS_PARAM_INVALID,
"IndexError: Dimension out of range "
"(expected to be in range of [[%lld], [%lld]], but got [%lld])",
(0 - input_shape_dims), (input_shape_dims - 1), dim_num);
} else {
KERNEL_CHECK_FALSE((dim_num >= -1 && dim_num <= 0), KERNEL_STATUS_PARAM_INVALID,
"IndexError: Dimension out of range "
"(expected to be in range of [[%lld], [%lld]], but got [%lld])",
-1, 0, dim_num);
}
} else {
Tensor *input_0 = ctx.Input(0);
KERNEL_CHECK_NULLPTR(input_0, KERNEL_STATUS_PARAM_INVALID, "Get input failed.");
KERNEL_CHECK_NULLPTR(input_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input data failed.");
Tensor *output_0 = ctx.Output(0);
KERNEL_CHECK_NULLPTR(output_0, KERNEL_STATUS_PARAM_INVALID, "Get output_0 failed.");
KERNEL_CHECK_NULLPTR(output_0->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data 0 failed.");
}
if (global_median_value == false) {
KERNEL_LOG_DEBUG(
"MedianCpuKernel[%s], input0: size[%llu];"
"output0: size[%llu], output1: size[%llu].",
ctx.GetOpType().c_str(), ctx.Input(0)->GetDataSize(), ctx.Output(0)->GetDataSize(), ctx.Output(1)->GetDataSize());
} else {
KERNEL_LOG_DEBUG(
"MedianCpuKernel[%s], input0: size[%llu];"
"output0: size[%llu].",
ctx.GetOpType().c_str(), ctx.Input(0)->GetDataSize(), ctx.Output(0)->GetDataSize());
}
return KERNEL_STATUS_OK;
}
template <typename T>
uint32_t MedianCpuKernel::GlobalMedianCompute(const CpuKernelContext &ctx) {
auto input_x0 = reinterpret_cast<T *>(ctx.Input(0)->GetData());
auto output_y0 = reinterpret_cast<T *>(ctx.Output(0)->GetData());
size_t data_num = ctx.Input(0)->GetTensorShape()->NumElements();
const int64_t half = 2;
std::nth_element(input_x0, input_x0 + static_cast<int64_t>((data_num - 1) / half), input_x0 + data_num);
*output_y0 = *(input_x0 + static_cast<int64_t>((data_num - 1) / half));
return KERNEL_STATUS_OK;
}
template <typename T>
uint32_t MedianCpuKernel::MedianCompute(const CpuKernelContext &ctx) {
auto input_x0 = reinterpret_cast<T *>(ctx.Input(0)->GetData());
auto output_y0 = reinterpret_cast<T *>(ctx.Output(0)->GetData());
auto output_y1 = reinterpret_cast<int64_t *>(ctx.Output(1)->GetData());
auto input_shape_ptr = ctx.Input(0)->GetTensorShape();
int64_t input_shape_dims = input_shape_ptr->GetDims();
if (input_shape_dims == 0) {
*output_y0 = *input_x0;
*output_y1 = 0;
return KERNEL_STATUS_OK;
}
int64_t dim_num = 0;
AttrValue *dim_ptr = ctx.GetAttr("axis");
if (dim_ptr != nullptr) {
dim_num = dim_ptr->GetInt();
}
if (dim_num < 0) {
dim_num += input_shape_dims;
}
auto input_shape_0 = ctx.Input(0)->GetTensorShape()->GetDimSizes();
int64_t dim_data_num = input_shape_0[dim_num];
T *temp_median_vec = new T[dim_data_num];
int64_t *temp_median_index_vec = new int64_t[dim_data_num];
int64_t group = 1;
int64_t jump = 1;
int64_t median_pos = static_cast<int64_t>((dim_data_num - 1) / 2);
if (dim_num != 0) {
for (int64_t i = 0; i < dim_num; i++) {
group *= input_shape_0[i];
}
}
if (dim_num != input_shape_dims - 1) {
for (int64_t i = dim_num + 1; i < input_shape_dims; i++) {
jump *= input_shape_0[i];
}
}
T *start = input_x0;
for (int64_t i = 0; i < group; i++) {
for (int64_t j = 0; j < jump; j++) {
for (int64_t k = 0; k < dim_data_num; k++) {
auto num_index = start + k * jump + j;
temp_median_index_vec[k] = k;
temp_median_vec[k] = *num_index;
}
std::nth_element(temp_median_index_vec, temp_median_index_vec + median_pos, temp_median_index_vec + dim_data_num,
[&temp_median_vec, dim_data_num](int64_t pos1, int64_t pos2) {
return (*(temp_median_vec + pos1) < *(temp_median_vec + pos2)) ||
(pos1 >= 0 && pos1 < dim_data_num &&
*(temp_median_vec + pos1) == *(temp_median_vec + pos2) && pos1 < pos2);
});
std::nth_element(temp_median_vec, temp_median_vec + median_pos, temp_median_vec + dim_data_num);
*(output_y0 + i * jump + j) = *(temp_median_vec + median_pos);
*(output_y1 + i * jump + j) = *(temp_median_index_vec + median_pos);
}
if (i != group - 1) {
start += jump * dim_data_num;
}
};
delete[] temp_median_vec;
delete[] temp_median_index_vec;
return KERNEL_STATUS_OK;
}
REGISTER_CPU_KERNEL(kMedian, MedianCpuKernel);
} // namespace aicpu

View File

@ -0,0 +1,38 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_KERNELS_NORMALIZED_MEDIAN_H_
#define AICPU_KERNELS_NORMALIZED_MEDIAN_H_
#include "cpu_ops_kernel.h"
#include "utils/bcast.h"
namespace aicpu {
class MedianCpuKernel : public CpuKernel {
public:
MedianCpuKernel() = default;
~MedianCpuKernel() override = default;
uint32_t Compute(const CpuKernelContext &ctx) override;
private:
uint32_t MedianCheck(const CpuKernelContext &ctx);
template <typename T>
uint32_t GlobalMedianCompute(const CpuKernelContext &ctx);
template <typename T>
uint32_t MedianCompute(const CpuKernelContext &ctx);
};
} // namespace aicpu
#endif

View File

@ -0,0 +1,280 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mediangrad.h"
#include "cpu_kernel_utils.h"
#include "utils/eigen_tensor.h"
#include "utils/kernel_util.h"
namespace {
const char *kMedianGrad = "MedianGrad";
const uint32_t kOutputNum = 1;
const uint32_t kInputNum = 4;
const uint32_t kGlobalOutputNum = 1;
const uint32_t kGlobalInputNum = 3;
// when input data size is more than kParallelDataNum, use Parallel func
const int64_t kParallelDataNum = 2 * 1024;
const int64_t kParallelDataNumMid = 16 * 1024;
#define MEDIANGRAD_COMPUTE_CASE(DTYPE, TYPE, TYPE2, CTX) \
case (DTYPE): { \
uint32_t result = MedianGradCompute<TYPE, TYPE2>(CTX); \
if (result != KERNEL_STATUS_OK) { \
KERNEL_LOG_ERROR("MedianGrad kernel compute failed."); \
return result; \
} \
break; \
}
#define GLOBALMEDIANGRAD_COMPUTE_CASE(DTYPE, TYPE, TYPE2, CTX) \
case (DTYPE): { \
uint32_t result = GlobalMedianGradCompute<TYPE, TYPE2>(CTX); \
if (result != KERNEL_STATUS_OK) { \
KERNEL_LOG_ERROR("GlobalMedianGrad kernel compute failed."); \
return result; \
} \
break; \
}
} // namespace
namespace aicpu {
uint32_t MedianGradCpuKernel::Compute(const CpuKernelContext &ctx) {
// check params
KERNEL_HANDLE_ERROR(MedianGradParamCheck(ctx), "MedianGrad check params failed.");
auto data_type_x = ctx.Input(1)->GetDataType();
AttrValue *global_median_ptr = ctx.GetAttr("global_median");
bool global_median = global_median_ptr->GetBool();
if (global_median == false) {
switch (data_type_x) {
MEDIANGRAD_COMPUTE_CASE(DT_INT16, int16_t, float, ctx)
MEDIANGRAD_COMPUTE_CASE(DT_INT32, int32_t, float, ctx)
MEDIANGRAD_COMPUTE_CASE(DT_INT64, int64_t, float, ctx)
MEDIANGRAD_COMPUTE_CASE(DT_FLOAT, float, float, ctx)
MEDIANGRAD_COMPUTE_CASE(DT_DOUBLE, double, double, ctx)
default:
KERNEL_LOG_ERROR("MedianGrad kernel data type [%s] of input x not support.", DTypeStr(data_type_x).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
} else {
switch (data_type_x) {
GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT16, int16_t, float, ctx)
GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT32, int32_t, float, ctx)
GLOBALMEDIANGRAD_COMPUTE_CASE(DT_INT64, int64_t, float, ctx)
GLOBALMEDIANGRAD_COMPUTE_CASE(DT_FLOAT, float, float, ctx)
GLOBALMEDIANGRAD_COMPUTE_CASE(DT_DOUBLE, double, double, ctx)
default:
KERNEL_LOG_ERROR("GlobalMedianGrad kernel data type [%s] of input x not support.",
DTypeStr(data_type_x).c_str());
return KERNEL_STATUS_PARAM_INVALID;
}
}
return KERNEL_STATUS_OK;
}
uint32_t MedianGradCpuKernel::MedianGradParamCheck(const CpuKernelContext &ctx) {
auto global_median_ptr = ctx.GetAttr("global_median");
KERNEL_CHECK_NULLPTR(global_median_ptr, KERNEL_STATUS_PARAM_INVALID, "Get attr global_median failed.");
bool global_median = global_median_ptr->GetBool();
if (global_median == false) {
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "MedianGrad check input and output number failed.");
} else {
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kGlobalInputNum, kGlobalOutputNum),
"GlobalMedianGrad check input and output number failed.");
}
Tensor *input_y_grad = ctx.Input(0);
Tensor *input_x = ctx.Input(1);
Tensor *input_y = ctx.Input(2);
Tensor *output_x_grad = ctx.Output(0);
int64_t y_grad_num = ctx.Input(0)->GetTensorShape()->NumElements();
int64_t y_num = ctx.Input(2)->GetTensorShape()->NumElements();
KERNEL_CHECK_FALSE((y_num == y_grad_num), KERNEL_STATUS_PARAM_INVALID,
"The data num of input y_grad [%llu] is different from y [%llu].", y_grad_num, y_num)
auto data_type_x = ctx.Input(1)->GetDataType();
auto data_type_y_grad = ctx.Input(0)->GetDataType();
KERNEL_CHECK_FALSE((data_type_y_grad == data_type_x), KERNEL_STATUS_PARAM_INVALID,
"The data type of input y_grad [%s] is different from x [%s].", DTypeStr(data_type_y_grad).c_str(),
DTypeStr(data_type_x).c_str())
if (global_median == false) {
Tensor *input_indices = ctx.Input(3);
KERNEL_LOG_DEBUG(
"MedianGradCpuKernel[%s], input_y_grad: size[%llu],"
"input_x: size[%llu], input_y: size[%llu],"
"input_indices: size[%llu], output_x_grad: size[%llu].",
ctx.GetOpType().c_str(), input_y_grad->GetDataSize(), input_x->GetDataSize(), input_y->GetDataSize(),
input_indices->GetDataSize(), output_x_grad->GetDataSize());
} else {
KERNEL_LOG_DEBUG(
"MedianGradCpuKernel[%s], input_y_grad: size[%llu],"
"input_x: size[%llu], input_y: size[%llu],"
"output_x_grad: size[%llu].",
ctx.GetOpType().c_str(), input_y_grad->GetDataSize(), input_x->GetDataSize(), input_y->GetDataSize(),
output_x_grad->GetDataSize());
}
return KERNEL_STATUS_OK;
}
template <typename T1, typename T2>
uint32_t MedianGradCpuKernel::GlobalMedianGradCompute(const CpuKernelContext &ctx) {
auto y_grad = reinterpret_cast<T1 *>(ctx.Input(0)->GetData());
auto x = reinterpret_cast<T1 *>(ctx.Input(1)->GetData());
auto y = reinterpret_cast<T1 *>(ctx.Input(2)->GetData());
auto x_grad = reinterpret_cast<T2 *>(ctx.Output(0)->GetData());
int64_t output_data_num = ctx.Output(0)->NumElements();
int64_t input_data_num = ctx.Input(1)->NumElements();
T2 count_repeat = 0;
for (int64_t i = 0; i < input_data_num; i++) {
count_repeat += (*(x + i) == *y) ? 1 : 0;
}
if (output_data_num >= kParallelDataNum) {
uint32_t min_core_num = 1;
uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
if (output_data_num <= kParallelDataNumMid) {
max_core_num = std::min(max_core_num, 4U); // up to 4 cpu cores
}
if (max_core_num > output_data_num) {
max_core_num = output_data_num;
}
auto sharder_mediangrad = [&](int64_t start, int64_t end) {
for (int64_t i = start; i < end; i++) {
*(x_grad + i) = (*(x + i) == *y) ? (*y_grad / count_repeat) : 0;
}
};
KERNEL_HANDLE_ERROR(
CpuKernelUtils::ParallelFor(ctx, output_data_num, output_data_num / max_core_num, sharder_mediangrad),
"MedianGrad Compute failed.");
} else {
for (int64_t i = 0; i < output_data_num; i++) {
*(x_grad + i) = (*(x + i) == *y) ? (*y_grad / count_repeat) : 0;
}
}
return KERNEL_STATUS_OK;
}
template <typename T1, typename T2>
uint32_t MedianGradCpuKernel::MedianGradCompute(const CpuKernelContext &ctx) {
auto y_grad = reinterpret_cast<T1 *>(ctx.Input(0)->GetData());
auto indices = reinterpret_cast<int64_t *>(ctx.Input(3)->GetData());
auto x_grad = reinterpret_cast<T2 *>(ctx.Output(0)->GetData());
int64_t output_data_num = ctx.Output(0)->NumElements();
int64_t need_calculate_num = ctx.Input(0)->NumElements();
for (int64_t i = 0; i < output_data_num; i++) {
*(x_grad + i) = 0;
}
AttrValue *axis_ptr = ctx.GetAttr("axis");
int64_t axis = axis_ptr == nullptr ? 0 : axis_ptr->GetInt();
std::vector<int64_t> shape_x = ctx.Input(1)->GetTensorShape()->GetDimSizes();
std::vector<int64_t> shape_y = ctx.Input(2)->GetTensorShape()->GetDimSizes();
std::vector<int64_t> shape_keepdim;
int64_t dim_num_x = ctx.Input(1)->GetTensorShape()->GetDims();
axis = axis >= 0 ? axis : axis + dim_num_x;
for (int64_t i = 0; i < dim_num_x; i++) {
if (i == axis) {
shape_keepdim.push_back(1);
} else {
shape_keepdim.push_back(shape_x[i]);
}
}
std::vector<int64_t> element_num_each_dim_x;
std::vector<int64_t> element_num_each_dim_y;
int64_t element_num_y = 1;
int64_t element_num_x = 1;
for (int64_t i = shape_keepdim.size() - 1; i >= 0; i--) {
element_num_each_dim_x.insert(element_num_each_dim_x.begin(), element_num_x);
element_num_x *= shape_x[i];
element_num_each_dim_y.insert(element_num_each_dim_y.begin(), element_num_y);
element_num_y *= shape_keepdim[i];
}
if (need_calculate_num >= kParallelDataNum) {
uint32_t min_core_num = 1;
uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
if (need_calculate_num <= kParallelDataNumMid) {
max_core_num = std::min(max_core_num, 4U); // up to 4 cpu cores
}
if (max_core_num > need_calculate_num) {
max_core_num = need_calculate_num;
}
auto sharder_mediangrad = [&](int64_t start, int64_t end) {
std::vector<int64_t> dim_vec;
for (int64_t i = 0; i < dim_num_x; i++) {
dim_vec.push_back(0);
}
for (int64_t nth_element = start; nth_element < end; nth_element++) {
int64_t elements_remain = nth_element;
for (int64_t i = 0; i < dim_num_x; i++) {
dim_vec[i] = elements_remain / element_num_each_dim_y[i];
elements_remain %= element_num_each_dim_y[i];
}
int64_t update_element_pos = 0;
for (int64_t i = 0; i < dim_num_x; i++) {
if (i == axis) {
update_element_pos += *(indices + nth_element) * element_num_each_dim_x[i];
} else {
update_element_pos += dim_vec[i] * element_num_each_dim_x[i];
}
}
*(x_grad + update_element_pos) = *(y_grad + nth_element);
}
};
KERNEL_HANDLE_ERROR(
CpuKernelUtils::ParallelFor(ctx, need_calculate_num, need_calculate_num / max_core_num, sharder_mediangrad),
"MedianGrad Compute failed.");
} else {
std::vector<int64_t> dim_vec;
for (int64_t i = 0; i < dim_num_x; i++) {
dim_vec.push_back(0);
}
for (int64_t nth_element = 0; nth_element < need_calculate_num; nth_element++) {
int64_t elements_remain = nth_element;
for (int64_t i = 0; i < dim_num_x; i++) {
dim_vec[i] = elements_remain / element_num_each_dim_y[i];
elements_remain %= element_num_each_dim_y[i];
}
int64_t update_element_pos = 0;
for (int64_t i = 0; i < dim_num_x; i++) {
if (i == axis) {
update_element_pos += *(indices + nth_element) * element_num_each_dim_x[i];
} else {
update_element_pos += dim_vec[i] * element_num_each_dim_x[i];
}
}
*(x_grad + update_element_pos) = *(y_grad + nth_element);
}
}
return KERNEL_STATUS_OK;
}
REGISTER_CPU_KERNEL(kMedianGrad, MedianGradCpuKernel);
} // namespace aicpu

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_KERNELS_NORMALIZED_MEDIANGRAD_H_
#define AICPU_KERNELS_NORMALIZED_MEDIANGRAD_H_
#include "cpu_ops_kernel.h"
#include "utils/bcast.h"
namespace aicpu {
class MedianGradCpuKernel : public CpuKernel {
public:
MedianGradCpuKernel() = default;
~MedianGradCpuKernel() override = default;
protected:
uint32_t Compute(const CpuKernelContext &ctx) override;
private:
uint32_t MedianGradParamCheck(const CpuKernelContext &ctx);
template <typename T1, typename T2>
uint32_t MedianGradCompute(const CpuKernelContext &ctx);
template <typename T1, typename T2>
uint32_t GlobalMedianGradCompute(const CpuKernelContext &ctx);
};
} // namespace aicpu
#endif

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "eigen_tensor.h"
namespace aicpu {
const Tensor *EigenTensor::GetTensor() const { return tensor_; }
} // namespace aicpu

View File

@ -0,0 +1,170 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_EIGENTENSOR_H
#define AICPU_EIGENTENSOR_H
#include "cpu_tensor.h"
#include "kernel_log.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace aicpu {
// Helper to define Tensor types given that the scalar is of type T.
template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
struct TTypes {
// Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned> Tensor;
typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned> ConstTensor;
// Unaligned Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType> > UnalignedTensor;
typedef Eigen::TensorMap<Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType> > UnalignedConstTensor;
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, int>, Eigen::Aligned> Tensor32Bit;
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
Scalar;
typedef Eigen::TensorMap<Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstScalar;
// Unaligned Scalar tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType> > UnalignedScalar;
typedef Eigen::TensorMap<Eigen::TensorFixedSize<const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType> >
UnalignedConstScalar;
// Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Flat;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> ConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> Vec;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned> ConstVec;
// Unaligned Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType> > UnalignedFlat;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType> > UnalignedConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType> > UnalignedVec;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType> > UnalignedConstVec;
// Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned> Matrix;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned> ConstMatrix;
// Unaligned Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType> > UnalignedMatrix;
typedef Eigen::TensorMap<Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType> > UnalignedConstMatrix;
};
} // namespace aicpu
namespace aicpu {
class EigenTensor {
public:
EigenTensor() = delete;
EigenTensor(Tensor *tensor, void *data) : tensor_(tensor), tensor_data_(data) {}
~EigenTensor() = default;
/*
* Get tensor
* @return succ: tensor, error : nullptr
*/
const Tensor *GetTensor() const;
/*
* Eigen vec
* @return Eigen vec
*/
template <typename T>
typename TTypes<T>::Vec vec() {
return tensor<T, 1>();
}
/*
* Eigen matrix
* @return Eigen matrix
*/
template <typename T>
typename TTypes<T>::Matrix matrix() {
return tensor<T, 2>();
}
/*
* Eigen ConstMatrix
* @return Eigen ConstMatrix
*/
template <typename T>
typename TTypes<T>::ConstMatrix matrix() const {
return tensor<T, 2>();
}
/*
* Eigen tensor
* @return Eigen tensor
*/
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor tensor() {
return typename TTypes<T, NDIMS>::Tensor(reinterpret_cast<T *>(tensor_data_), AsEigenDSizes<NDIMS>());
}
/*
* Eigen ConstTensor
* @return Eigen ConstTensor
*/
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::ConstTensor tensor() const {
return typename TTypes<T, NDIMS>::ConstTensor(reinterpret_cast<const T *>(tensor_data_), AsEigenDSizes<NDIMS>());
}
/*
* Eigen Flat
* @return Eigen Flat
*/
template <typename T>
typename TTypes<T>::Flat flat() {
return typename TTypes<T>::Flat(reinterpret_cast<T *>(tensor_data_), {tensor_->GetTensorShape()->NumElements()});
}
/*
* which case we pad the rest of the sizes with 1.
* @return Eigen::DSizes: pad the rest of the sizes with 1
*/
template <int NDIMS, typename IndexType>
Eigen::DSizes<IndexType, NDIMS> AsEigenDSizesWithPadding() const {
Eigen::DSizes<IndexType, NDIMS> dsizes;
for (int d = 0; d < tensor_->GetTensorShape()->GetDims(); d++) {
dsizes[d] = static_cast<IndexType>(tensor_->GetTensorShape()->GetDimSize(d));
}
for (int d = tensor_->GetTensorShape()->GetDims(); d < NDIMS; d++) {
dsizes[d] = 1;
}
return dsizes;
}
/*
* Fill `*dsizes` from `*this`
* @return Eigen::DSizes: pad the rest of the sizes with 1
*/
template <int NDIMS, typename IndexType = Eigen::DenseIndex>
Eigen::DSizes<IndexType, NDIMS> AsEigenDSizes() const {
return AsEigenDSizesWithPadding<NDIMS, IndexType>();
}
private:
Tensor *tensor_;
void *tensor_data_;
};
} // namespace aicpu
#endif // AICPU_EIGENTENSOR_H

View File

@ -48,7 +48,14 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An
kSliceGradOpName,
kRandomShuffleOpName,
kRangeOpName};
static const std::set<std::string> kMigrateAicpuKernelOps = {kACosOpName};
static const std::set<std::string> kMigrateAicpuKernelOps = {
mindspore::kACosOpName,
mindspore::kLogMatrixDeterminantOpName,
mindspore::kAdaptiveAvgPool2dOpName,
mindspore::kAdaptiveAvgPool2dGradOpName,
mindspore::kMedianOpName,
mindspore::kMedianGradOpName,
};
static const std::string kEnvOpSoNames = "mindspore_aicpu_kernels";
static const std::string kCpuKernelSoName = "mindspore_cpu_kernels";