forked from mindspore-Ecosystem/mindspore
!48406 fix api migration issues and some doc issues
Merge pull request !48406 from 李林杰/0204_fix_aicpu_migration_issues_master
This commit is contained in:
commit
9171b5d329
|
@ -348,3 +348,5 @@ mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel
|
||||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpSpecialComputeComplex
|
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpSpecialComputeComplex
|
||||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpBcastComputeComplex
|
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpBcastComputeComplex
|
||||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/resize_bicubic_grad.cc:aicpu::ResizeBicubicGrad
|
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/resize_bicubic_grad.cc:aicpu::ResizeBicubicGrad
|
||||||
|
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/segment_max.cc:aicpu::SegmentMaxCpuKernel::SegmentMaxCompute
|
||||||
|
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/extract_glimpse.cc:aicpu::ExtractGlimpseCpuKernel::Compute
|
||||||
|
|
|
@ -312,6 +312,7 @@ constexpr auto kExpandDOpName = "ExpandD";
|
||||||
constexpr auto kExpandDimsOpName = "ExpandDims";
|
constexpr auto kExpandDimsOpName = "ExpandDims";
|
||||||
constexpr auto kExpOpName = "Exp";
|
constexpr auto kExpOpName = "Exp";
|
||||||
constexpr auto kExtractGlimpse = "ExtractGlimpse";
|
constexpr auto kExtractGlimpse = "ExtractGlimpse";
|
||||||
|
constexpr auto kExtractGlimpseOpName = "ExtractGlimpse";
|
||||||
constexpr auto kExtractImagePatchesOpName = "ExtractImagePatches";
|
constexpr auto kExtractImagePatchesOpName = "ExtractImagePatches";
|
||||||
constexpr auto kEyeOpName = "Eye";
|
constexpr auto kEyeOpName = "Eye";
|
||||||
constexpr auto kFastGeLUOpName = "FastGeLU";
|
constexpr auto kFastGeLUOpName = "FastGeLU";
|
||||||
|
@ -468,6 +469,7 @@ constexpr auto kLSTMGradOpName = "LSTMGrad";
|
||||||
constexpr auto kLSTMInputGradOpName = "LSTMInputGrad";
|
constexpr auto kLSTMInputGradOpName = "LSTMInputGrad";
|
||||||
constexpr auto kLSTMOpName = "LSTM";
|
constexpr auto kLSTMOpName = "LSTM";
|
||||||
constexpr auto kLstsqOpName = "Lstsq";
|
constexpr auto kLstsqOpName = "Lstsq";
|
||||||
|
constexpr auto kLuSolveOpName = "LuSolve";
|
||||||
constexpr auto kLuUnpackOpName = "LuUnpack";
|
constexpr auto kLuUnpackOpName = "LuUnpack";
|
||||||
constexpr auto kLuUnpackGradOpName = "LuUnpackGrad";
|
constexpr auto kLuUnpackGradOpName = "LuUnpackGrad";
|
||||||
constexpr auto kMaskedFillOpName = "MaskedFill";
|
constexpr auto kMaskedFillOpName = "MaskedFill";
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
namespace ge {
|
namespace ge {
|
||||||
constexpr int64_t kMaxDimSize = 32;
|
constexpr int64_t kMaxDimSize = 32;
|
||||||
|
constexpr int64_t DIM_SIZE2 = 2;
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
struct RuntimeTensorDesc {
|
struct RuntimeTensorDesc {
|
||||||
|
|
|
@ -1,154 +0,0 @@
|
||||||
/**
|
|
||||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
#include "cache_swap_table.h"
|
|
||||||
#include <securec.h>
|
|
||||||
#include <map>
|
|
||||||
#include "cpu_types.h"
|
|
||||||
#include "kernel_log.h"
|
|
||||||
#include "status.h"
|
|
||||||
#include "utils/sparse_tensor.h"
|
|
||||||
#include "utils/kernel_util.h"
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
const char *const kCacheSwapTable = "CacheSwapTable";
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace aicpu {
|
|
||||||
template <typename T>
|
|
||||||
uint32_t CacheSwapTableTask(std::vector<Tensor *> &inputs, std::vector<Tensor *> &outputs, int64_t batch_size,
|
|
||||||
int64_t output_size, int64_t one_line_col, int type_size) {
|
|
||||||
if (inputs.size() == 0 || outputs.size() == 0) {
|
|
||||||
KERNEL_LOG_ERROR("CacheSwapTable input or output is empty.");
|
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *cache_table = reinterpret_cast<char *>(inputs[0]->GetData());
|
|
||||||
T *swap_cache_idx = reinterpret_cast<T *>(inputs[1]->GetData());
|
|
||||||
uint64_t swap_cache_idx_size = inputs[1]->GetDataSize();
|
|
||||||
char *miss_value = reinterpret_cast<char *>(inputs[2]->GetData());
|
|
||||||
|
|
||||||
char *old_value = reinterpret_cast<char *>(outputs[0]->GetData());
|
|
||||||
|
|
||||||
errno_t ret = memset_s(old_value, static_cast<size_t>(output_size * type_size), 0x00,
|
|
||||||
static_cast<size_t>(output_size * type_size));
|
|
||||||
if (ret != EOK) {
|
|
||||||
KERNEL_LOG_ERROR("Memset failed, result[%d]", ret);
|
|
||||||
return KERNEL_STATUS_INNER_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t single_copy_size = static_cast<uint64_t>(type_size * one_line_col);
|
|
||||||
|
|
||||||
if (swap_cache_idx_size < static_cast<uint64_t>(batch_size)) {
|
|
||||||
KERNEL_LOG_ERROR(
|
|
||||||
"The value of swap_cache_idx_size:[%llu] must be less than "
|
|
||||||
"batch_size:[%lld]",
|
|
||||||
swap_cache_idx_size, batch_size);
|
|
||||||
return KERNEL_STATUS_INNER_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t old_value_size = outputs[0]->GetDataSize();
|
|
||||||
uint64_t cache_table_size = inputs[0]->GetDataSize();
|
|
||||||
for (int64_t i = 0; i < batch_size; ++i) {
|
|
||||||
if (swap_cache_idx[i] < 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ret = memcpy_s(old_value + i * single_copy_size, old_value_size, cache_table + swap_cache_idx[i] * single_copy_size,
|
|
||||||
single_copy_size);
|
|
||||||
old_value_size -= single_copy_size;
|
|
||||||
if (ret != EOK) {
|
|
||||||
KERNEL_LOG_ERROR("CacheSwapTable memcpy failed, result [%d].", ret);
|
|
||||||
return KERNEL_STATUS_INNER_ERROR;
|
|
||||||
}
|
|
||||||
ret = memcpy_s(cache_table + swap_cache_idx[i] * single_copy_size, cache_table_size,
|
|
||||||
miss_value + i * single_copy_size, single_copy_size);
|
|
||||||
cache_table_size -= single_copy_size;
|
|
||||||
if (ret != EOK) {
|
|
||||||
KERNEL_LOG_ERROR("CacheSwapTable memcpy failed, result [%d].", ret);
|
|
||||||
return KERNEL_STATUS_INNER_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return KERNEL_STATUS_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t CacheSwapTableMsCpuKernel::DoCompute() {
|
|
||||||
std::map<int, std::function<uint32_t(std::vector<Tensor *> &, std::vector<Tensor *> &, int64_t &, int64_t &,
|
|
||||||
int64_t &, int &)>>
|
|
||||||
calls;
|
|
||||||
calls[DT_INT32] = CacheSwapTableTask<int32_t>;
|
|
||||||
calls[DT_INT64] = CacheSwapTableTask<int64_t>;
|
|
||||||
|
|
||||||
if (calls.find(indices_type_) == calls.end()) {
|
|
||||||
KERNEL_LOG_ERROR(
|
|
||||||
"CacheSwapTableMsCpuKernel op doesn't support indices tensor types: "
|
|
||||||
"[%s]",
|
|
||||||
DTypeStr(indices_type_).c_str());
|
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
int type_size = GetSizeByDataType(param_type_);
|
|
||||||
return calls[indices_type_](inputs_, outputs_, batch_size_, output_size_, one_line_col_, type_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t CacheSwapTableMsCpuKernel::GetInputAndCheck(CpuKernelContext &ctx) {
|
|
||||||
KERNEL_LOG_INFO("GetInputAndCheck start!");
|
|
||||||
// get input Tensors
|
|
||||||
const uint32_t kNumInput = 3;
|
|
||||||
for (uint32_t i = 0; i < kNumInput; ++i) {
|
|
||||||
Tensor *tensor = ctx.Input(i);
|
|
||||||
KERNEL_CHECK_NULLPTR(tensor, KERNEL_STATUS_PARAM_INVALID, "Get input tensor[%d] failed", i)
|
|
||||||
inputs_.push_back(tensor);
|
|
||||||
}
|
|
||||||
// get output Tensors
|
|
||||||
const uint32_t kNumOutput = 1;
|
|
||||||
for (uint32_t i = 0; i < kNumOutput; ++i) {
|
|
||||||
Tensor *tensor = ctx.Output(i);
|
|
||||||
KERNEL_CHECK_NULLPTR(tensor, KERNEL_STATUS_PARAM_INVALID, "Get output tensor[%d] failed", i)
|
|
||||||
outputs_.push_back(tensor);
|
|
||||||
}
|
|
||||||
// get param type
|
|
||||||
param_type_ = static_cast<DataType>(inputs_[0]->GetDataType());
|
|
||||||
indices_type_ = static_cast<DataType>(inputs_[1]->GetDataType());
|
|
||||||
KERNEL_LOG_INFO("GetInputAndCheck success!");
|
|
||||||
|
|
||||||
std::shared_ptr<TensorShape> cache_table_shape = ctx.Input(0)->GetTensorShape();
|
|
||||||
std::shared_ptr<TensorShape> indices_shape = ctx.Input(1)->GetTensorShape();
|
|
||||||
|
|
||||||
for (int32_t i = 1; i < cache_table_shape->GetDims(); ++i) {
|
|
||||||
KERNEL_CHECK_ASSIGN_64S_MULTI(one_line_col_, cache_table_shape->GetDimSize(i), one_line_col_,
|
|
||||||
KERNEL_STATUS_PARAM_INVALID);
|
|
||||||
}
|
|
||||||
for (int32_t i = 0; i < indices_shape->GetDims(); ++i) {
|
|
||||||
KERNEL_CHECK_ASSIGN_64S_MULTI(batch_size_, indices_shape->GetDimSize(i), batch_size_, KERNEL_STATUS_PARAM_INVALID);
|
|
||||||
}
|
|
||||||
output_size_ = batch_size_ * one_line_col_;
|
|
||||||
return KERNEL_STATUS_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t CacheSwapTableMsCpuKernel::Compute(CpuKernelContext &ctx) {
|
|
||||||
uint32_t res = GetInputAndCheck(ctx);
|
|
||||||
if (res != KERNEL_STATUS_OK) {
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
res = DoCompute();
|
|
||||||
if (res != KERNEL_STATUS_OK) {
|
|
||||||
KERNEL_LOG_ERROR("Compute failed");
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
return KERNEL_STATUS_OK;
|
|
||||||
}
|
|
||||||
REGISTER_CPU_KERNEL(kCacheSwapTable, CacheSwapTableMsCpuKernel);
|
|
||||||
} // namespace aicpu
|
|
|
@ -0,0 +1,213 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "extract_glimpse.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <random>
|
||||||
|
#include "cpu_kernel_utils.h"
|
||||||
|
#include "utils/eigen_tensor.h"
|
||||||
|
#include "utils/kernel_util.h"
|
||||||
|
using namespace std;
|
||||||
|
random_device rd;
|
||||||
|
mt19937 gen(rd());
|
||||||
|
uniform_real_distribution<float> dis_uniform(0.0f, 255.0f);
|
||||||
|
normal_distribution<float> dis_normal(10, 0.5);
|
||||||
|
#define SHED 2048
|
||||||
|
namespace {
|
||||||
|
const uint32_t kOutputNum = 1;
|
||||||
|
const uint32_t kInputNum = 3;
|
||||||
|
const char *kExtractGlimpse = "ExtractGlimpse";
|
||||||
|
} // namespace
|
||||||
|
namespace aicpu {
|
||||||
|
uint32_t ExtractGlimpseCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||||
|
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "ExtractGlimpse check input and output number failed.");
|
||||||
|
KERNEL_HANDLE_ERROR(ExtractGlimpseCheck(ctx), "ExtractGlimpse check params failed.");
|
||||||
|
Tensor *x = ctx.Input(0);
|
||||||
|
Tensor *ss = ctx.Input(1);
|
||||||
|
Tensor *offsets = ctx.Input(2);
|
||||||
|
Tensor *y = ctx.Output(0);
|
||||||
|
AttrValue *centered = ctx.GetAttr("centered");
|
||||||
|
AttrValue *normalized = ctx.GetAttr("normalized");
|
||||||
|
AttrValue *uniform_noise = ctx.GetAttr("uniform_noise");
|
||||||
|
AttrValue *noise = ctx.GetAttr("noise");
|
||||||
|
float *x_data = (float *)x->GetData();
|
||||||
|
int32_t *ss_data = (int32_t *)ss->GetData();
|
||||||
|
float *offsets_data = (float *)offsets->GetData();
|
||||||
|
float *y_data = (float *)y->GetData();
|
||||||
|
uint64_t offsets_cnt = offsets->GetTensorShape()->GetDimSize(0);
|
||||||
|
uint64_t batch_cnt = x->GetTensorShape()->GetDimSize(0);
|
||||||
|
KERNEL_CHECK_FALSE(offsets_cnt == batch_cnt, KERNEL_STATUS_PARAM_INVALID, "offsets should equal to batches")
|
||||||
|
int64_t image_height = x->GetTensorShape()->GetDimSize(1);
|
||||||
|
int64_t image_width = x->GetTensorShape()->GetDimSize(2);
|
||||||
|
int64_t channels = x->GetTensorShape()->GetDimSize(3);
|
||||||
|
uint64_t g_height = ss_data[0], g_width = ss_data[1];
|
||||||
|
uint64_t size1 = image_width * image_height * channels;
|
||||||
|
uint64_t size2 = image_width * channels;
|
||||||
|
uint64_t size3 = g_height * g_width * channels;
|
||||||
|
uint64_t size4 = size3 / g_height;
|
||||||
|
int64_t g_size = g_width * g_height;
|
||||||
|
if (batch_cnt > SHED) {
|
||||||
|
uint32_t min_core = 1;
|
||||||
|
uint64_t max_core = std::max(min_core, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||||
|
max_core = min(max_core, (uint64_t)batch_cnt);
|
||||||
|
auto fun = [&](size_t st, size_t ed) {
|
||||||
|
for (auto i = st; i < ed; i++) {
|
||||||
|
float x = offsets_data[i << 1], y = offsets_data[1 + (i << 1)];
|
||||||
|
if (normalized->GetBool()) {
|
||||||
|
x *= image_height;
|
||||||
|
y *= image_width;
|
||||||
|
}
|
||||||
|
if (centered->GetBool()) {
|
||||||
|
x /= 2.0f;
|
||||||
|
y /= 2.0f;
|
||||||
|
x += image_height / 2.0f;
|
||||||
|
y += image_width / 2.0f;
|
||||||
|
}
|
||||||
|
x -= g_height / 2.0f;
|
||||||
|
y -= g_width / 2.0f;
|
||||||
|
for (int64_t v = 0; v < g_size; v++) {
|
||||||
|
int64_t j = v / g_width, k = v % g_width;
|
||||||
|
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||||
|
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||||
|
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||||
|
for (int u = 0; u < channels; u++) {
|
||||||
|
if (uniform_noise->GetBool())
|
||||||
|
y_data[pos_y + u] = dis_uniform(gen);
|
||||||
|
else if (noise->GetString() == "zero")
|
||||||
|
y_data[pos_y + u] = 0.0f;
|
||||||
|
else if (noise->GetString() == "gaussian")
|
||||||
|
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||||
|
else {
|
||||||
|
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||||
|
for (int u = 0; u < channels; u++) {
|
||||||
|
y_data[pos_y + u] = x_data[pos_x + u];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
};
|
||||||
|
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, batch_cnt, batch_cnt / max_core, fun),
|
||||||
|
"ExtractGlimpse Compute failed.");
|
||||||
|
} else {
|
||||||
|
for (uint64_t i = 0; i < batch_cnt; i++) {
|
||||||
|
float x = offsets_data[i << 1], y = offsets_data[1 + (i << 1)];
|
||||||
|
if (normalized->GetBool()) {
|
||||||
|
x *= image_height;
|
||||||
|
y *= image_width;
|
||||||
|
}
|
||||||
|
if (centered->GetBool()) {
|
||||||
|
x /= 2.0f;
|
||||||
|
y /= 2.0f;
|
||||||
|
x += image_height / 2.0f;
|
||||||
|
y += image_width / 2.0f;
|
||||||
|
}
|
||||||
|
x -= g_height / 2.0f;
|
||||||
|
y -= g_width / 2.0f;
|
||||||
|
if (g_size < SHED) {
|
||||||
|
for (int64_t v = 0; v < g_size; v++) {
|
||||||
|
int64_t j = v / g_width, k = v % g_width;
|
||||||
|
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||||
|
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||||
|
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||||
|
for (int u = 0; u < channels; u++) {
|
||||||
|
if (uniform_noise->GetBool())
|
||||||
|
y_data[pos_y + u] = dis_uniform(gen);
|
||||||
|
else if (noise->GetString() == "zero")
|
||||||
|
y_data[pos_y + u] = 0.0f;
|
||||||
|
else if (noise->GetString() == "gaussian")
|
||||||
|
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||||
|
else {
|
||||||
|
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||||
|
for (int u = 0; u < channels; u++) {
|
||||||
|
y_data[pos_y + u] = x_data[pos_x + u];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t min_core = 1;
|
||||||
|
uint64_t max_core = std::max(min_core, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||||
|
max_core = min(max_core, (uint64_t)g_size);
|
||||||
|
auto fun = [&](size_t st, size_t ed) {
|
||||||
|
for (auto v = st; v < ed; v++) {
|
||||||
|
int64_t j = v / g_width, k = v % g_width;
|
||||||
|
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||||
|
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||||
|
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||||
|
for (int u = 0; u < channels; u++)
|
||||||
|
if (uniform_noise->GetBool())
|
||||||
|
y_data[pos_y + u] = dis_uniform(gen);
|
||||||
|
else if (noise->GetString() == "zero")
|
||||||
|
y_data[pos_y + u] = 0.0f;
|
||||||
|
else if (noise->GetString() == "gaussian")
|
||||||
|
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||||
|
else {
|
||||||
|
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||||
|
for (int u = 0; u < channels; u++) {
|
||||||
|
y_data[pos_y + u] = x_data[pos_x + u];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
};
|
||||||
|
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, g_size, g_size / max_core, fun),
|
||||||
|
"ExtractGlimpse Compute failed.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
uint32_t ExtractGlimpseCpuKernel::ExtractGlimpseCheck(CpuKernelContext &ctx) {
|
||||||
|
Tensor *x = ctx.Input(0);
|
||||||
|
Tensor *ss = ctx.Input(1);
|
||||||
|
Tensor *offsets = ctx.Input(2);
|
||||||
|
Tensor *y = ctx.Output(0);
|
||||||
|
AttrValue *centered = ctx.GetAttr("centered");
|
||||||
|
AttrValue *normalized = ctx.GetAttr("normalized");
|
||||||
|
AttrValue *uniform_noise = ctx.GetAttr("uniform_noise");
|
||||||
|
AttrValue *noise = ctx.GetAttr("noise");
|
||||||
|
KERNEL_CHECK_NULLPTR(x, KERNEL_STATUS_PARAM_INVALID, "Get input 0 failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(ss, KERNEL_STATUS_PARAM_INVALID, "Get input 1 failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(offsets, KERNEL_STATUS_PARAM_INVALID, "Get input 2 failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(y, KERNEL_STATUS_PARAM_INVALID, "Get output 0 failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(centered, KERNEL_STATUS_PARAM_INVALID, "Get attribute centered failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(normalized, KERNEL_STATUS_PARAM_INVALID, "Get attribute normalized failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(uniform_noise, KERNEL_STATUS_PARAM_INVALID, "Get attribute uniform_noise failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(noise, KERNEL_STATUS_PARAM_INVALID, "Get attribute noise failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(x->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 0 data failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(ss->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 1 data failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(offsets->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 2 data failed.")
|
||||||
|
KERNEL_CHECK_NULLPTR(y->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output 0 data failed.")
|
||||||
|
KERNEL_CHECK_FALSE(x->GetDataType() == DT_FLOAT && ss->GetDataType() == DT_INT32 &&
|
||||||
|
offsets->GetDataType() == DT_FLOAT && y->GetDataType() == DT_FLOAT,
|
||||||
|
KERNEL_STATUS_PARAM_INVALID, "data type error.")
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
REGISTER_CPU_KERNEL(kExtractGlimpse, ExtractGlimpseCpuKernel);
|
||||||
|
} // namespace aicpu
|
|
@ -0,0 +1,35 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AICPU_IMPL_EXTRACT_GLIMPSE_H_
|
||||||
|
#define AICPU_IMPL_EXTRACT_GLIMPSE_H_
|
||||||
|
|
||||||
|
#include "cpu_ops_kernel.h"
|
||||||
|
|
||||||
|
namespace aicpu {
|
||||||
|
class ExtractGlimpseCpuKernel : public CpuKernel {
|
||||||
|
public:
|
||||||
|
ExtractGlimpseCpuKernel() = default;
|
||||||
|
~ExtractGlimpseCpuKernel() override = default;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static uint32_t ExtractGlimpseCheck(CpuKernelContext &ctx);
|
||||||
|
};
|
||||||
|
} // namespace aicpu
|
||||||
|
#endif
|
|
@ -124,27 +124,6 @@ double FFTWithSizeCpuKernel::Getnormalized(int64_t n, std::string normalized, bo
|
||||||
if (normalized == "backward") result = 1.0 / n;
|
if (normalized == "backward") result = 1.0 / n;
|
||||||
if (normalized == "ortho") result = 1.0 / sqrt((double)n);
|
if (normalized == "ortho") result = 1.0 / sqrt((double)n);
|
||||||
}
|
}
|
||||||
// if (signal_ndim == 1) {
|
|
||||||
// result = sqrt((double)out_shape[out_shape.size() - 1]);
|
|
||||||
// } else if (signal_ndim == 2) {
|
|
||||||
// result = sqrt((double)(out_shape[out_shape.size() - 1] *
|
|
||||||
// out_shape[out_shape.size() - 2]));
|
|
||||||
// } else {
|
|
||||||
// result = sqrt((double)(out_shape[out_shape.size() - 1] *
|
|
||||||
// out_shape[out_shape.size() - 2] *
|
|
||||||
// out_shape[out_shape.size() - 3]));
|
|
||||||
// }
|
|
||||||
// if (is_reverse) {
|
|
||||||
// if (result == 0) {
|
|
||||||
// KERNEL_LOG_ERROR("DivideByZeroExcepiton");
|
|
||||||
// }
|
|
||||||
// result = 1.0 / result;
|
|
||||||
// }
|
|
||||||
// KERNEL_LOG_DEBUG(
|
|
||||||
// "FFTWithSizeCpuKernel[GetNormalized], "
|
|
||||||
// "input_shape[%s] normalize[%s]. "
|
|
||||||
// "is_reverse: [%d]. norm_:[%lf]",
|
|
||||||
// VectorToString(out_shape).c_str(), normalized, is_reverse, result);
|
|
||||||
std::cout << "result = " << result << std::endl;
|
std::cout << "result = " << result << std::endl;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -350,14 +329,7 @@ uint32_t FFTWithSizeCpuKernel::FFTWithSizeCompute(CpuKernelContext &ctx, bool on
|
||||||
if (is_real) {
|
if (is_real) {
|
||||||
inverse = real_inverse;
|
inverse = real_inverse;
|
||||||
}
|
}
|
||||||
std::cout << out;
|
|
||||||
std::cout << "===========";
|
|
||||||
// if
|
|
||||||
// std::vector<int64_t> out_shape(out.dimensions().begin(),
|
|
||||||
// out.dimensions().end());
|
|
||||||
// if (is_real && !inverse) {
|
|
||||||
// out_shape.back() = x_shape.back();
|
|
||||||
// }
|
|
||||||
std::cout << out;
|
std::cout << out;
|
||||||
auto cout = x_shape_ptr->NumElements();
|
auto cout = x_shape_ptr->NumElements();
|
||||||
auto norm = Getnormalized(cout, normalized, inverse);
|
auto norm = Getnormalized(cout, normalized, inverse);
|
||||||
|
|
|
@ -1,18 +1,3 @@
|
||||||
/**
|
|
||||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
#ifndef AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
#ifndef AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
||||||
#define AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
#define AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
||||||
|
|
||||||
|
@ -37,4 +22,4 @@ class FFTWithSizeCpuKernel : public CpuKernel {
|
||||||
static double Getnormalized(int64_t n, std::string normalized, bool is_reverse);
|
static double Getnormalized(int64_t n, std::string normalized, bool is_reverse);
|
||||||
};
|
};
|
||||||
} // namespace aicpu
|
} // namespace aicpu
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -15,129 +15,160 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fill.h"
|
#include "fill.h"
|
||||||
|
#include "cpu_kernel_utils.h"
|
||||||
#include "utils/eigen_tensor.h"
|
#include "utils/eigen_tensor.h"
|
||||||
#include "utils/kernel_util.h"
|
#include "utils/kernel_util.h"
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
const char *const kFill = "Fill";
|
const uint32_t kOutputNum = 1;
|
||||||
}
|
const uint32_t kInputNum = 2;
|
||||||
|
const char *kFill = "Fill";
|
||||||
|
const char *kFillV2 = "FillV2";
|
||||||
|
const int64_t kParallelDataNumCriticalPoint1 = 128 * 1024;
|
||||||
|
const int64_t kParallelDataNumCriticalPoint2 = 2 * 1024 * 1024;
|
||||||
|
|
||||||
|
#define CALCULATE_DIMS_DTYPE_CASE(DTYPE, TYPE) \
|
||||||
|
case (DTYPE): { \
|
||||||
|
if (CalculateDims<TYPE>(dims_tensor, dims) != KERNEL_STATUS_OK) { \
|
||||||
|
KERNEL_LOG_ERROR("Fill kernel calculate dims failed."); \
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID; \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define FILL_GENERATE_DTYPE_CASE(DTYPE, TYPE) \
|
||||||
|
case (DTYPE): { \
|
||||||
|
FillOutput<TYPE>(ctx, value_tensor, output); \
|
||||||
|
break; \
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
namespace aicpu {
|
namespace aicpu {
|
||||||
template <typename T>
|
uint32_t FillCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||||
void FillGenerateCase(Tensor *&value_tensor, Tensor *&output) {
|
// 校验输入个数和输出个数,以及输入和输入tensor的属性是否为空
|
||||||
auto value = *(reinterpret_cast<T *>(value_tensor->GetData()));
|
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "Check input and output number failed.");
|
||||||
if (AddrAlignedCheck(output->GetData())) {
|
|
||||||
Eigen::TensorMap<Eigen::Tensor<T, 1>, Eigen::Aligned> eigen_output(static_cast<T *>(output->GetData()),
|
|
||||||
output->GetTensorShape()->NumElements());
|
|
||||||
eigen_output.setConstant(value);
|
|
||||||
} else {
|
|
||||||
Eigen::TensorMap<Eigen::Tensor<T, 1>, Eigen::Unaligned> eigen_output(static_cast<T *>(output->GetData()),
|
|
||||||
output->GetTensorShape()->NumElements());
|
|
||||||
eigen_output.setConstant(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t FillCpuKernel::GetDimsByType(CpuKernelContext &ctx) {
|
std::vector<int64_t> dims;
|
||||||
dims.clear();
|
|
||||||
Tensor *dims_tensor = ctx.Input(0);
|
Tensor *dims_tensor = ctx.Input(0);
|
||||||
KERNEL_CHECK_NULLPTR(dims_tensor, KERNEL_STATUS_PARAM_INVALID, "Get dims input failed")
|
|
||||||
uint32_t ret;
|
|
||||||
auto dims_dtype = dims_tensor->GetDataType();
|
auto dims_dtype = dims_tensor->GetDataType();
|
||||||
switch (dims_dtype) {
|
switch (dims_dtype) {
|
||||||
case (DT_INT32):
|
CALCULATE_DIMS_DTYPE_CASE(DT_INT32, int32_t)
|
||||||
ret = CalcDims<int32_t>(dims_tensor, dims);
|
CALCULATE_DIMS_DTYPE_CASE(DT_INT64, int64_t)
|
||||||
break;
|
|
||||||
case (DT_INT64):
|
|
||||||
ret = CalcDims<int64_t>(dims_tensor, dims);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
KERNEL_LOG_ERROR(
|
KERNEL_LOG_ERROR("Fill kernel dims data_type [%u] not support, support data_types: DT_INT32, DT_INT64.",
|
||||||
"Fill kernel dims data_type [%u] not support, support data_types: "
|
dims_dtype);
|
||||||
"DT_INT32, DT_INT64",
|
|
||||||
dims_dtype);
|
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
if (ret != KERNEL_STATUS_OK) {
|
|
||||||
KERNEL_LOG_ERROR("Fill kernel calculate dims failed");
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t FillCpuKernel::Compute(CpuKernelContext &ctx) {
|
|
||||||
uint32_t check = GetDimsByType(ctx);
|
|
||||||
if (check != KERNEL_STATUS_OK) {
|
|
||||||
return check;
|
|
||||||
}
|
|
||||||
Tensor *value_tensor = ctx.Input(1);
|
Tensor *value_tensor = ctx.Input(1);
|
||||||
KERNEL_CHECK_NULLPTR(value_tensor, KERNEL_STATUS_PARAM_INVALID, "Get value input failed")
|
if (value_tensor->NumElements() != 1) {
|
||||||
KERNEL_CHECK_NULLPTR(value_tensor->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get value input data failed")
|
|
||||||
KERNEL_CHECK_NULLPTR(value_tensor->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get value input shape failed")
|
|
||||||
if (!value_tensor->GetTensorShape()->GetDimSizes().empty()) {
|
|
||||||
KERNEL_LOG_ERROR("Fill kernel value input is not a scalar.");
|
KERNEL_LOG_ERROR("Fill kernel value input is not a scalar.");
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor *output = ctx.Output(0);
|
Tensor *output = ctx.Output(0);
|
||||||
KERNEL_CHECK_NULLPTR(output, KERNEL_STATUS_PARAM_INVALID, "Get output failed")
|
if (output->GetTensorShape()->GetDims() != static_cast<int64_t>(dims.size())) {
|
||||||
KERNEL_CHECK_NULLPTR(output->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data failed")
|
|
||||||
KERNEL_CHECK_NULLPTR(output->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output shape failed")
|
|
||||||
if (output->GetTensorShape()->GetDimSizes() != dims) {
|
|
||||||
KERNEL_LOG_ERROR("Fill kernel output shape not matched.");
|
KERNEL_LOG_ERROR("Fill kernel output shape not matched.");
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
|
if (output->GetTensorShape()->GetDimSizes() != dims) {
|
||||||
|
output->GetTensorShape()->SetDimSizes(dims);
|
||||||
|
}
|
||||||
|
|
||||||
auto input_dtype = value_tensor->GetDataType();
|
auto input_dtype = value_tensor->GetDataType();
|
||||||
auto output_dtype = output->GetDataType();
|
auto output_dtype = output->GetDataType();
|
||||||
if (input_dtype != output_dtype) {
|
if (input_dtype != output_dtype) {
|
||||||
KERNEL_LOG_ERROR("Fill kernel data type not matched, value input dtype [%u], output dtype [%u].", input_dtype,
|
KERNEL_LOG_ERROR(
|
||||||
output_dtype);
|
"Fill kernel data type not matched, value input dtype [%u], output dtype [%u], support data_types: "
|
||||||
|
"DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, DT_INT32, DT_INT64, DT_INT8, DT_UINT16, "
|
||||||
|
"DT_UINT32, DT_UINT64, DT_UINT8, DT_BOOL.",
|
||||||
|
input_dtype, output_dtype);
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<int, std::function<void(Tensor *&, Tensor *&)>> calls;
|
switch (output_dtype) {
|
||||||
calls[DT_INT8] = FillGenerateCase<int8_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_INT8, int8_t)
|
||||||
calls[DT_UINT8] = FillGenerateCase<uint8_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_UINT8, uint8_t)
|
||||||
calls[DT_INT16] = FillGenerateCase<int16_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_INT16, int16_t)
|
||||||
calls[DT_UINT16] = FillGenerateCase<uint16_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_UINT16, uint16_t)
|
||||||
calls[DT_INT32] = FillGenerateCase<int32_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_INT32, int32_t)
|
||||||
calls[DT_UINT32] = FillGenerateCase<uint32_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_UINT32, uint32_t)
|
||||||
calls[DT_INT64] = FillGenerateCase<int64_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_INT64, int64_t)
|
||||||
calls[DT_UINT64] = FillGenerateCase<uint64_t>;
|
FILL_GENERATE_DTYPE_CASE(DT_UINT64, uint64_t)
|
||||||
calls[DT_BOOL] = FillGenerateCase<bool>;
|
FILL_GENERATE_DTYPE_CASE(DT_BOOL, bool)
|
||||||
calls[DT_FLOAT16] = FillGenerateCase<Eigen::half>;
|
FILL_GENERATE_DTYPE_CASE(DT_FLOAT16, Eigen::half)
|
||||||
calls[DT_FLOAT] = FillGenerateCase<float>;
|
FILL_GENERATE_DTYPE_CASE(DT_FLOAT, float)
|
||||||
calls[DT_DOUBLE] = FillGenerateCase<double>;
|
FILL_GENERATE_DTYPE_CASE(DT_DOUBLE, double)
|
||||||
|
FILL_GENERATE_DTYPE_CASE(DT_COMPLEX64, std::complex<float>)
|
||||||
if (calls.find(output_dtype) == calls.end()) {
|
FILL_GENERATE_DTYPE_CASE(DT_COMPLEX128, std::complex<double>)
|
||||||
KERNEL_LOG_ERROR("Fill kernel data type [%u] not support", output_dtype);
|
default:
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
KERNEL_LOG_ERROR(
|
||||||
|
"Fill kernel data type [%u] not support, not support data_types: DT_STRING, DT_DUAL_SUB_INT8, "
|
||||||
|
"DT_DUAL_SUB_UINT8, DT_QUINT8, DT_QINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_RESOURCE, DT_STRING_REF, "
|
||||||
|
"DT_DUAL, DT_UNDEFINED.",
|
||||||
|
output_dtype);
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
calls[output_dtype](value_tensor, output);
|
|
||||||
return KERNEL_STATUS_OK;
|
return KERNEL_STATUS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
uint32_t FillCpuKernel::CalcDims(const Tensor *dims_tensor, std::vector<int64_t> &dim_vec) {
|
uint32_t FillCpuKernel::CalculateDims(const Tensor *dims_tensor, std::vector<int64_t> &dims) {
|
||||||
|
// 获取第一个输入tensor中的元素个数,第一个输入是一个一维的tensor(dims_tensor)
|
||||||
uint64_t data_num = dims_tensor->GetDataSize() / sizeof(T);
|
uint64_t data_num = dims_tensor->GetDataSize() / sizeof(T);
|
||||||
if (data_num == 0) {
|
auto dims_data = reinterpret_cast<const T *>(dims_tensor->GetData());
|
||||||
KERNEL_LOG_INFO("Fill kernel: dims is empty, fill scalar output.");
|
|
||||||
return KERNEL_STATUS_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
KERNEL_CHECK_NULLPTR(dims_tensor->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get dims data failed")
|
|
||||||
for (uint64_t i = 0; i < data_num; i++) {
|
for (uint64_t i = 0; i < data_num; i++) {
|
||||||
auto dim = *(reinterpret_cast<const T *>(dims_tensor->GetData()) + i);
|
auto dim = *(dims_data + i);
|
||||||
if (dim < 0) {
|
if (dim < 0) {
|
||||||
KERNEL_LOG_ERROR("Fill kernel: input dim [%llu] is negative, value=[%lld]", i, static_cast<int64_t>(dim));
|
KERNEL_LOG_ERROR("dims input dim [%llu] is negative, value=[%lld].", i, static_cast<int64_t>(dim));
|
||||||
return KERNEL_STATUS_PARAM_INVALID;
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
// zero dim is different from empty dim.
|
|
||||||
if (dim == 0) {
|
if (dim == 0) {
|
||||||
KERNEL_LOG_INFO("Fill kernel: input dim [%llu] is zero", i);
|
KERNEL_LOG_INFO("dims input dim [%llu] is zero.", i);
|
||||||
|
dims.clear();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
dim_vec.emplace_back(dim);
|
dims.emplace_back(dim);
|
||||||
}
|
}
|
||||||
|
|
||||||
return KERNEL_STATUS_OK;
|
return KERNEL_STATUS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void FillCpuKernel::FillOutput(CpuKernelContext &ctx, const Tensor *value_tensor, Tensor *output) {
|
||||||
|
auto value = reinterpret_cast<T *>(value_tensor->GetData());
|
||||||
|
auto output_data = reinterpret_cast<T *>(output->GetData());
|
||||||
|
int64_t data_num = output->NumElements();
|
||||||
|
|
||||||
|
if (data_num >= kParallelDataNumCriticalPoint1) {
|
||||||
|
uint32_t min_core_num = 1;
|
||||||
|
uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx));
|
||||||
|
|
||||||
|
if (data_num <= kParallelDataNumCriticalPoint2) {
|
||||||
|
max_core_num = std::min(max_core_num, 4U);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_core_num > data_num) {
|
||||||
|
max_core_num = data_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto shared_fill = [&](int64_t start, int64_t end) { SpecialFillOutput<T>(start, end, output_data, value); };
|
||||||
|
|
||||||
|
CpuKernelUtils::ParallelFor(ctx, data_num, data_num / max_core_num, shared_fill);
|
||||||
|
} else {
|
||||||
|
SpecialFillOutput<T>(0, data_num, output_data, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void FillCpuKernel::SpecialFillOutput(int64_t start, int64_t end, T *output_data, const T *value) {
|
||||||
|
for (int64_t i = start; i < end; i++) {
|
||||||
|
*(output_data + i) = *(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
REGISTER_CPU_KERNEL(kFill, FillCpuKernel);
|
REGISTER_CPU_KERNEL(kFill, FillCpuKernel);
|
||||||
|
REGISTER_CPU_KERNEL(kFillV2, FillCpuKernel);
|
||||||
} // namespace aicpu
|
} // namespace aicpu
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -14,8 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef AICPU_KERNELS_NORMALIZED_FILL_H
|
#ifndef AICPU_KERNELS_NORMALIZED_FILL_H_
|
||||||
#define AICPU_KERNELS_NORMALIZED_FILL_H
|
#define AICPU_KERNELS_NORMALIZED_FILL_H_
|
||||||
|
|
||||||
#include "cpu_ops_kernel.h"
|
#include "cpu_ops_kernel.h"
|
||||||
|
|
||||||
|
@ -23,21 +23,18 @@ namespace aicpu {
|
||||||
class FillCpuKernel : public CpuKernel {
|
class FillCpuKernel : public CpuKernel {
|
||||||
public:
|
public:
|
||||||
FillCpuKernel() = default;
|
FillCpuKernel() = default;
|
||||||
~FillCpuKernel() override = default;
|
~FillCpuKernel() = default;
|
||||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t GetDimsByType(CpuKernelContext &ctx);
|
|
||||||
/**
|
|
||||||
* @brief calc dims from input dims tensor
|
|
||||||
* @param dims_tensor input dims tensor
|
|
||||||
* @param dims output shape dims
|
|
||||||
* @return status if success
|
|
||||||
*/
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
uint32_t CalcDims(const Tensor *dims_tensor, std::vector<int64_t> &dims);
|
uint32_t CalculateDims(const Tensor *dims_tensor, std::vector<int64_t> &dims);
|
||||||
|
|
||||||
std::vector<int64_t> dims;
|
template <typename T>
|
||||||
|
void FillOutput(CpuKernelContext &ctx, const Tensor *value_tensor, Tensor *output);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void SpecialFillOutput(int64_t start, int64_t end, T *output_data, const T *value);
|
||||||
};
|
};
|
||||||
} // namespace aicpu
|
} // namespace aicpu
|
||||||
#endif // AICPU_KERNELS_NORMALIZED_FILL_H_
|
#endif
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
/**
|
||||||
|
* Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "log_normal_reverse.h"
|
||||||
|
#include <random>
|
||||||
|
#include <set>
|
||||||
|
#include "cpu_kernel_utils.h"
|
||||||
|
#include "cpu_ops_kernel.h"
|
||||||
|
#include "utils/eigen_tensor.h"
|
||||||
|
#include "utils/kernel_util.h"
|
||||||
|
#include <ctime>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
using namespace std;
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const uint32_t kNumInput = 1;
|
||||||
|
const uint32_t kNumOutput = 1;
|
||||||
|
|
||||||
|
const char *kLogNormalReverse = "LogNormalReverse";
|
||||||
|
const int64_t kParallelDataNumSameShape = 16 * 1024;
|
||||||
|
const int64_t kParallelDataNumMid = 128 * 1024;
|
||||||
|
} // namespace
|
||||||
|
namespace aicpu {
|
||||||
|
uint32_t LogNormalReverseCpuKernel::GetInputAndCheck(CpuKernelContext &ctx) {
|
||||||
|
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kNumInput, kNumOutput), "LogNormalReverse check input and output failed.");
|
||||||
|
// get and check input
|
||||||
|
Tensor *input = ctx.Input(0);
|
||||||
|
inputs_.push_back(input);
|
||||||
|
|
||||||
|
// get output Tensors
|
||||||
|
Tensor *output = ctx.Output(0);
|
||||||
|
outputs_.push_back(output);
|
||||||
|
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
uint32_t LogNormalReverseCpuKernel::DoCompute(CpuKernelContext &ctx) {
|
||||||
|
float input_mean = 1.0;
|
||||||
|
float input_std = 2.0;
|
||||||
|
|
||||||
|
auto mean_value = ctx.GetAttr("mean");
|
||||||
|
auto std_value = ctx.GetAttr("std");
|
||||||
|
|
||||||
|
if (mean_value != nullptr) {
|
||||||
|
input_mean = mean_value->GetFloat();
|
||||||
|
}
|
||||||
|
if (std_value != nullptr) {
|
||||||
|
input_std = std_value->GetFloat();
|
||||||
|
}
|
||||||
|
|
||||||
|
T *output_y = reinterpret_cast<T *>(outputs_[0]->GetData());
|
||||||
|
|
||||||
|
static default_random_engine random_engine(time(0));
|
||||||
|
static std::normal_distribution<float> normal_value(input_mean, input_std);
|
||||||
|
|
||||||
|
int64_t Nums = inputs_[0]->GetTensorShape()->NumElements();
|
||||||
|
|
||||||
|
int64_t data_num = Nums;
|
||||||
|
if (data_num >= kParallelDataNumSameShape) {
|
||||||
|
uint32_t max_core_num = std::max(1U, aicpu::CpuKernelUtils::GetCPUNum(ctx) - kResvCpuNum);
|
||||||
|
|
||||||
|
if (data_num <= kParallelDataNumMid) {
|
||||||
|
max_core_num = std::min(max_core_num, 4U);
|
||||||
|
}
|
||||||
|
if (max_core_num > data_num) {
|
||||||
|
max_core_num = data_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto shared_lognormalreverse = [&](size_t start, size_t end) {
|
||||||
|
for (size_t i = start; i < end; i++) {
|
||||||
|
output_y[i] = static_cast<T>(std::exp(normal_value(random_engine)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (max_core_num == 0) {
|
||||||
|
max_core_num = 1;
|
||||||
|
}
|
||||||
|
CpuKernelUtils::ParallelFor(ctx, data_num, data_num / max_core_num, shared_lognormalreverse);
|
||||||
|
} else {
|
||||||
|
for (int64_t i = 0; i < Nums; i++) {
|
||||||
|
output_y[i] = static_cast<T>(std::exp(normal_value(random_engine)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t LogNormalReverseCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||||
|
uint32_t res = GetInputAndCheck(ctx);
|
||||||
|
if (res != KERNEL_STATUS_OK) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataType input_type{ctx.Input(0)->GetDataType()};
|
||||||
|
switch (input_type) {
|
||||||
|
case (DT_FLOAT16): {
|
||||||
|
DoCompute<Eigen::half>(ctx);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case (DT_FLOAT): {
|
||||||
|
DoCompute<float>(ctx);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
KERNEL_LOG_ERROR("[%s] Data type of input is not support, input data type is [%s].", ctx.GetOpType().c_str(),
|
||||||
|
DTypeStr(input_type).c_str());
|
||||||
|
res = KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
if (res != KERNEL_STATUS_OK) {
|
||||||
|
KERNEL_LOG_ERROR("log normal reverse failed");
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
REGISTER_CPU_KERNEL(kLogNormalReverse, LogNormalReverseCpuKernel);
|
||||||
|
} // namespace aicpu
|
|
@ -1,44 +1,38 @@
|
||||||
/**
|
/**
|
||||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
* You may obtain a copy of the License at
|
* You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
#ifndef AICPU_KERNELS_NORMALIZED_CACHE_SWAP_TABLE_H
|
|
||||||
#define AICPU_KERNELS_NORMALIZED_CACHE_SWAP_TABLE_H
|
#ifndef AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
||||||
|
#define AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
||||||
#include <cmath>
|
|
||||||
#include <vector>
|
#include "cpu_ops_kernel.h"
|
||||||
#include "cpu_ops_kernel.h"
|
|
||||||
|
namespace aicpu {
|
||||||
namespace aicpu {
|
class LogNormalReverseCpuKernel : public CpuKernel {
|
||||||
class CacheSwapTableMsCpuKernel : public CpuKernel {
|
public:
|
||||||
public:
|
LogNormalReverseCpuKernel() = default;
|
||||||
~CacheSwapTableMsCpuKernel() = default;
|
~LogNormalReverseCpuKernel() override = default;
|
||||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t DoCompute();
|
template <typename T>
|
||||||
|
uint32_t DoCompute(CpuKernelContext &ctx);
|
||||||
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
||||||
|
|
||||||
int64_t batch_size_ = 1;
|
std::vector<Tensor *> inputs_;
|
||||||
int64_t one_line_col_ = 1;
|
std::vector<Tensor *> outputs_;
|
||||||
int64_t output_size_ = 1;
|
};
|
||||||
|
} // namespace aicpu
|
||||||
std::vector<Tensor *> inputs_;
|
#endif // AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
||||||
std::vector<Tensor *> outputs_;
|
|
||||||
DataType param_type_ = DT_FLOAT;
|
|
||||||
DataType indices_type_ = DT_INT32;
|
|
||||||
};
|
|
||||||
} // namespace aicpu
|
|
||||||
#endif
|
|
|
@ -21,7 +21,6 @@
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <unsupported/Eigen/CXX11/Tensor>
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
#include "kernel_util.h"
|
|
||||||
#include "utils/kernel_util.h"
|
#include "utils/kernel_util.h"
|
||||||
#define NoneN 1000
|
#define NoneN 1000
|
||||||
using namespace Eigen;
|
using namespace Eigen;
|
||||||
|
|
|
@ -436,4 +436,4 @@ uint32_t ResizeBicubicGradCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||||
return KERNEL_STATUS_OK;
|
return KERNEL_STATUS_OK;
|
||||||
}
|
}
|
||||||
REGISTER_CPU_KERNEL(kResizeBicubicGrad, ResizeBicubicGradCpuKernel);
|
REGISTER_CPU_KERNEL(kResizeBicubicGrad, ResizeBicubicGradCpuKernel);
|
||||||
} // namespace aicpu
|
} // namespace aicpu
|
||||||
|
|
|
@ -35,4 +35,4 @@ class ResizeBicubicGradCpuKernel : public CpuKernel {
|
||||||
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
||||||
};
|
};
|
||||||
} // namespace aicpu
|
} // namespace aicpu
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,217 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "segment_max.h"
|
||||||
|
|
||||||
|
#include "cpu_kernel_utils.h"
|
||||||
|
#include "utils/eigen_tensor.h"
|
||||||
|
#include "utils/kernel_util.h"
|
||||||
|
#include "cpu_kernel/common/runtime_tensor_desc.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const uint32_t kInputNum = 2;
|
||||||
|
const uint32_t kOutputNum = 1;
|
||||||
|
const char *kSegmentMax = "SegmentMax";
|
||||||
|
const int64_t kDataSize = 2 * 1024;
|
||||||
|
|
||||||
|
#define SEGMENTMAX_COMPUTE_CASE(DTYPE, TYPE1, TYPE2, CTX) \
|
||||||
|
case (DTYPE): { \
|
||||||
|
uint32_t result = SegmentMaxCompute<TYPE1, TYPE2>(CTX); \
|
||||||
|
if (result != KERNEL_STATUS_OK) { \
|
||||||
|
KERNEL_LOG_ERROR("SegmentMax kernel compute failed."); \
|
||||||
|
return result; \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SEGMENTMAX_COMPUTE_CASE_ALL(TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_INT8, int8_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_INT16, int16_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_INT32, int32_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_INT64, int64_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_UINT8, uint8_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_UINT16, uint16_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_UINT32, uint32_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_UINT64, uint64_t, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_FLOAT16, Eigen::half, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_FLOAT, float, TYPE, CTX) \
|
||||||
|
SEGMENTMAX_COMPUTE_CASE(DT_DOUBLE, double, TYPE, CTX)
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
namespace aicpu {
|
||||||
|
uint32_t SegmentMaxCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||||
|
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "SegmentMax check input and output number failed.");
|
||||||
|
auto data_type = ctx.Input(0)->GetDataType();
|
||||||
|
auto segment_ids_type = ctx.Input(1)->GetDataType();
|
||||||
|
switch (segment_ids_type) {
|
||||||
|
case DT_INT32: {
|
||||||
|
switch (data_type) {
|
||||||
|
SEGMENTMAX_COMPUTE_CASE_ALL(int32_t, ctx)
|
||||||
|
default:
|
||||||
|
KERNEL_LOG_ERROR("Input[0] data type[%s] not supported.", DTypeStr(data_type).c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DT_INT64: {
|
||||||
|
switch (data_type) {
|
||||||
|
SEGMENTMAX_COMPUTE_CASE_ALL(int64_t, ctx)
|
||||||
|
default:
|
||||||
|
KERNEL_LOG_ERROR("Input[0] data type[%s] not supported.", DTypeStr(data_type).c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
KERNEL_LOG_ERROR("Input[1] data type[%s] not supported.", DTypeStr(segment_ids_type).c_str());
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2>
|
||||||
|
uint32_t SegmentMaxCpuKernel::SegmentMaxCompute(CpuKernelContext &ctx) {
|
||||||
|
Tensor *input_x_data = ctx.Input(0);
|
||||||
|
auto input_x_addr = reinterpret_cast<T1 *>(input_x_data->GetData());
|
||||||
|
auto input_x_shape = input_x_data->GetTensorShape();
|
||||||
|
auto input_x_dims = input_x_shape->GetDimSizes();
|
||||||
|
int64_t input_x_num = input_x_data->NumElements();
|
||||||
|
Tensor *segment_ids_data = ctx.Input(1);
|
||||||
|
auto segment_ids_data_addr = reinterpret_cast<T2 *>(segment_ids_data->GetData());
|
||||||
|
int64_t segment_ids_data_num = segment_ids_data->NumElements();
|
||||||
|
input_x_dims[0] = segment_ids_data_addr[segment_ids_data_num - 1] + 1;
|
||||||
|
Tensor *output_data = ctx.Output(0);
|
||||||
|
auto output_data_addr = reinterpret_cast<T1 *>(output_data->GetData());
|
||||||
|
auto output_data_shape = output_data->GetTensorShape();
|
||||||
|
if (output_data_shape->GetDimSize(0) < input_x_dims[0]) {
|
||||||
|
KERNEL_LOG_ERROR("The number of segments of the segmentation result of segment_ids is too large.");
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
output_data_shape->SetDimSizes(input_x_dims);
|
||||||
|
if (!output_data->SetTensorShape(output_data_shape.get())) {
|
||||||
|
KERNEL_LOG_ERROR("Set output shape failed.");
|
||||||
|
return KERNEL_STATUS_INNER_ERROR;
|
||||||
|
}
|
||||||
|
int64_t output_data_num = output_data->NumElements();
|
||||||
|
for (int64_t i = 0; i < output_data_num; i++) {
|
||||||
|
output_data_addr[i] = static_cast<T1>(0);
|
||||||
|
}
|
||||||
|
std::vector<int64_t> segments_segment_ids;
|
||||||
|
if (segment_ids_data_num != (input_x_data->GetTensorShape()->GetDimSize(0))) {
|
||||||
|
KERNEL_LOG_ERROR("The amount of data for input[1] must be equal to the first dimension of input[0].");
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
if (segment_ids_data_addr[0] < 0) {
|
||||||
|
KERNEL_LOG_ERROR("Input[1] must be nonnegative data.");
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
int64_t seg_tmp = 1;
|
||||||
|
for (int64_t i = 0; i < segment_ids_data_num - 1; i++) {
|
||||||
|
if (segment_ids_data_addr[i] > segment_ids_data_addr[i + 1]) {
|
||||||
|
KERNEL_LOG_ERROR("Input[1] must be an ascending ordered sequence.");
|
||||||
|
return KERNEL_STATUS_PARAM_INVALID;
|
||||||
|
}
|
||||||
|
if (segment_ids_data_addr[i] == segment_ids_data_addr[i + 1]) {
|
||||||
|
seg_tmp++;
|
||||||
|
} else {
|
||||||
|
segments_segment_ids.push_back(seg_tmp);
|
||||||
|
seg_tmp = 1;
|
||||||
|
}
|
||||||
|
if (i == segment_ids_data_num - ge::DIM_SIZE2) {
|
||||||
|
segments_segment_ids.push_back(seg_tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const int64_t num_compare_per = input_x_num / (input_x_shape->GetDimSize(0));
|
||||||
|
const int64_t num_segments_segment_ids = segments_segment_ids.size();
|
||||||
|
if (num_segments_segment_ids < kDataSize) {
|
||||||
|
for (int64_t i = 0; i < num_segments_segment_ids; i++) {
|
||||||
|
int64_t count = segments_segment_ids[i];
|
||||||
|
int64_t count_no = 0;
|
||||||
|
for (int64_t j = 0; j < i; j++) {
|
||||||
|
count_no += segments_segment_ids[j];
|
||||||
|
}
|
||||||
|
int64_t input_addr_base = count_no * num_compare_per;
|
||||||
|
if (num_compare_per < kDataSize) {
|
||||||
|
for (int64_t j = 0; j < num_compare_per; j++) {
|
||||||
|
int64_t max_init_addr = input_addr_base + j;
|
||||||
|
T1 max_value = input_x_addr[max_init_addr];
|
||||||
|
for (int64_t k = 1; k < count; k++) {
|
||||||
|
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||||
|
if (max_value < input_x_addr[cmp_addr]) {
|
||||||
|
max_value = input_x_addr[cmp_addr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t min_core_num = 1;
|
||||||
|
int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||||
|
if (max_core_num > num_compare_per) {
|
||||||
|
max_core_num = num_compare_per;
|
||||||
|
}
|
||||||
|
auto shard_compute = [&](size_t start, size_t end) {
|
||||||
|
for (size_t j = start; j < end; j++) {
|
||||||
|
int64_t max_init_addr = input_addr_base + j;
|
||||||
|
T1 max_value = input_x_addr[max_init_addr];
|
||||||
|
for (int64_t k = 1; k < count; k++) {
|
||||||
|
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||||
|
if (max_value < input_x_addr[cmp_addr]) {
|
||||||
|
max_value = input_x_addr[cmp_addr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
KERNEL_HANDLE_ERROR(
|
||||||
|
CpuKernelUtils::ParallelFor(ctx, num_compare_per, num_compare_per / max_core_num, shard_compute),
|
||||||
|
"SegmentMax Compute failed.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t min_core_num_seg = 1;
|
||||||
|
int64_t max_core_num_seg = std::max(min_core_num_seg, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||||
|
if (max_core_num_seg > num_segments_segment_ids) {
|
||||||
|
max_core_num_seg = num_segments_segment_ids;
|
||||||
|
}
|
||||||
|
auto shard_compute_seg = [&](size_t start_seg, size_t end_seg) {
|
||||||
|
for (size_t i = start_seg; i < end_seg; i++) {
|
||||||
|
int64_t count = segments_segment_ids[i];
|
||||||
|
int64_t count_no = 0;
|
||||||
|
for (size_t j = 0; j < i; j++) {
|
||||||
|
count_no += segments_segment_ids[j];
|
||||||
|
}
|
||||||
|
int64_t input_addr_base = count_no * num_compare_per;
|
||||||
|
for (int64_t j = 0; j < num_compare_per; j++) {
|
||||||
|
int64_t max_init_addr = input_addr_base + j;
|
||||||
|
T1 max_value = input_x_addr[max_init_addr];
|
||||||
|
for (int64_t k = 1; k < count; k++) {
|
||||||
|
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||||
|
if (max_value < input_x_addr[cmp_addr]) {
|
||||||
|
max_value = input_x_addr[cmp_addr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, num_segments_segment_ids,
|
||||||
|
num_segments_segment_ids / max_core_num_seg, shard_compute_seg),
|
||||||
|
"SegmentMax Compute failed.");
|
||||||
|
}
|
||||||
|
return KERNEL_STATUS_OK;
|
||||||
|
}
|
||||||
|
REGISTER_CPU_KERNEL(kSegmentMax, SegmentMaxCpuKernel);
|
||||||
|
} // namespace aicpu
|
|
@ -0,0 +1,35 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef AICPU_KERNELS_NORMALIZED_SEGMENTMAX_H_
|
||||||
|
#define AICPU_KERNELS_NORMALIZED_SEGMENTMAX_H_
|
||||||
|
|
||||||
|
#include "cpu_ops_kernel.h"
|
||||||
|
|
||||||
|
namespace aicpu {
|
||||||
|
class SegmentMaxCpuKernel : public CpuKernel {
|
||||||
|
public:
|
||||||
|
SegmentMaxCpuKernel() = default;
|
||||||
|
~SegmentMaxCpuKernel() override = default;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename T1, typename T2>
|
||||||
|
static uint32_t SegmentMaxCompute(CpuKernelContext &ctx);
|
||||||
|
};
|
||||||
|
} // namespace aicpu
|
||||||
|
#endif
|
|
@ -54,7 +54,6 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An
|
||||||
static const std::set<std::string> kMigrateAicpuKernelOps = {mindspore::kAdaptiveAvgPool2DV1OpName,
|
static const std::set<std::string> kMigrateAicpuKernelOps = {mindspore::kAdaptiveAvgPool2DV1OpName,
|
||||||
mindspore::kAdaptiveAvgPool2DGradV1OpName,
|
mindspore::kAdaptiveAvgPool2DGradV1OpName,
|
||||||
mindspore::kBucketizeOpName,
|
mindspore::kBucketizeOpName,
|
||||||
mindspore::kCacheSwapTableOpName,
|
|
||||||
mindspore::kCauchyOpName,
|
mindspore::kCauchyOpName,
|
||||||
mindspore::kChannelShuffleOpName,
|
mindspore::kChannelShuffleOpName,
|
||||||
mindspore::kCholeskyOpName,
|
mindspore::kCholeskyOpName,
|
||||||
|
@ -252,7 +251,10 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An
|
||||||
mindspore::kLogicalXorOpName,
|
mindspore::kLogicalXorOpName,
|
||||||
mindspore::kLogNormalReverseOpName,
|
mindspore::kLogNormalReverseOpName,
|
||||||
mindspore::kBetaincOpName,
|
mindspore::kBetaincOpName,
|
||||||
mindspore::kLessEqualOpName};
|
mindspore::kLessEqualOpName,
|
||||||
|
mindspore::kHSVToRGBOpName,
|
||||||
|
mindspore::kLuSolveOpName,
|
||||||
|
mindspore::kExtractGlimpseOpName};
|
||||||
|
|
||||||
static const std::string kEnvOpSoNames = "mindspore_aicpu_kernels";
|
static const std::string kEnvOpSoNames = "mindspore_aicpu_kernels";
|
||||||
static const std::string kCpuKernelSoName = "mindspore_cpu_kernels";
|
static const std::string kCpuKernelSoName = "mindspore_cpu_kernels";
|
||||||
|
|
|
@ -1160,7 +1160,7 @@ class PoissonNLLLoss(LossBase):
|
||||||
Args:
|
Args:
|
||||||
log_input (bool, optional): Whether use log input. Default: True.
|
log_input (bool, optional): Whether use log input. Default: True.
|
||||||
full (bool, optional): Whether include the Stirling approximation term in the loss calculation. Default: False.
|
full (bool, optional): Whether include the Stirling approximation term in the loss calculation. Default: False.
|
||||||
eps (float, optional): Lower bound of `x` when calculating logarithms. Default: 1e-8.
|
eps (float, optional): Lower bound of `x` when calculating logarithms. Default: 1e-08.
|
||||||
reduction (str, optional): Apply specific reduction method to the output:
|
reduction (str, optional): Apply specific reduction method to the output:
|
||||||
'none', 'mean', 'sum'. Default: 'mean'.
|
'none', 'mean', 'sum'. Default: 'mean'.
|
||||||
|
|
||||||
|
|
|
@ -350,3 +350,5 @@ from .hsv_to_rgb import _hsv_to_rgb_aicpu
|
||||||
from .im2col import _im2col_aicpu
|
from .im2col import _im2col_aicpu
|
||||||
from .lu_solve import _lu_solve_aicpu
|
from .lu_solve import _lu_solve_aicpu
|
||||||
from .relu_grad_v3 import _relu_grad_v3_aicpu
|
from .relu_grad_v3 import _relu_grad_v3_aicpu
|
||||||
|
from .resize_bicubic import _resize_bicubic_aicpu
|
||||||
|
from .extract_glimpse import _extract_glimpse_aicpu
|
||||||
|
|
|
@ -86,7 +86,7 @@ from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AccumulateNV2, AssignAdd, A
|
||||||
MatrixLogarithm, MatrixPower, MatrixSolve, MatrixTriangularSolve, ReduceStd, STFT,
|
MatrixLogarithm, MatrixPower, MatrixSolve, MatrixTriangularSolve, ReduceStd, STFT,
|
||||||
NextAfter, Orgqr, Qr, RaggedRange, Digamma, Eig, EuclideanNorm, CompareAndBitpack, ComplexAbs,
|
NextAfter, Orgqr, Qr, RaggedRange, Digamma, Eig, EuclideanNorm, CompareAndBitpack, ComplexAbs,
|
||||||
CumulativeLogsumexp, Gcd, Trace, TridiagonalMatMul, TrilIndices, TriuIndices, Zeta,
|
CumulativeLogsumexp, Gcd, Trace, TridiagonalMatMul, TrilIndices, TriuIndices, Zeta,
|
||||||
Roll, Lgamma, Logit)
|
Roll, Lgamma, Logit, MatrixSolveLs)
|
||||||
from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSparseLazyAdam, AdamNoUpdateParam,
|
from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSparseLazyAdam, AdamNoUpdateParam,
|
||||||
ApplyMomentum, BatchNorm, BiasAdd, Conv2D, Conv3D, Conv2DTranspose, Conv3DTranspose,
|
ApplyMomentum, BatchNorm, BiasAdd, Conv2D, Conv3D, Conv2DTranspose, Conv3DTranspose,
|
||||||
DepthwiseConv2dNative,
|
DepthwiseConv2dNative,
|
||||||
|
@ -647,7 +647,8 @@ __all__ = [
|
||||||
"SparseSlice",
|
"SparseSlice",
|
||||||
"ResizeLinear1D",
|
"ResizeLinear1D",
|
||||||
"ResizeBicubic",
|
"ResizeBicubic",
|
||||||
"Logit"
|
"Logit",
|
||||||
|
"MatrixSolveLs"
|
||||||
]
|
]
|
||||||
|
|
||||||
__custom__ = [
|
__custom__ = [
|
||||||
|
|
|
@ -465,10 +465,10 @@ class NonMaxSuppressionWithOverlaps(Primitive):
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> overlaps = Tensor(np.array([[0.6964692, 0.28613934, 0.22685145, 0.5513148],
|
>>> overlaps = Tensor(np.array([[0.6964692, 0.28613934, 0.22685145, 0.5513148],
|
||||||
[0.71946895, 0.42310646, 0.9807642, 0.6848297],
|
... [0.71946895, 0.42310646, 0.9807642, 0.6848297],
|
||||||
[0.4809319, 0.39211753, 0.343178, 0.7290497],
|
... [0.4809319, 0.39211753, 0.343178, 0.7290497],
|
||||||
[0.43857226, 0.059677895, 0.39804426, 0.7379954]
|
... [0.43857226, 0.059677895, 0.39804426, 0.7379954]
|
||||||
]), mstype.float32)
|
... ]), mstype.float32)
|
||||||
>>> scores = Tensor(np.array([0.18249173, 0.17545176, 0.53155136, 0.53182757]), mstype.float32)
|
>>> scores = Tensor(np.array([0.18249173, 0.17545176, 0.53155136, 0.53182757]), mstype.float32)
|
||||||
>>> max_output_size = Tensor(4, mstype.int32)
|
>>> max_output_size = Tensor(4, mstype.int32)
|
||||||
>>> overlap_threshold = Tensor(0.1, mstype.float32)
|
>>> overlap_threshold = Tensor(0.1, mstype.float32)
|
||||||
|
|
|
@ -260,6 +260,7 @@ class Addcdiv(Primitive):
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
||||||
|
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` are not the same.
|
||||||
ValueError: If `x1` could not be broadcast to `x2`.
|
ValueError: If `x1` could not be broadcast to `x2`.
|
||||||
ValueError: If `value` could not be broadcast to `x1/x2`.
|
ValueError: If `value` could not be broadcast to `x1/x2`.
|
||||||
ValueError: If `input_data` could not be broadcast to `value*(x1/x2)`.
|
ValueError: If `input_data` could not be broadcast to `value*(x1/x2)`.
|
||||||
|
@ -303,9 +304,7 @@ class Addcmul(Primitive):
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
||||||
TypeError: If dtype of `input_data` is not one of: float32, float16, int32.
|
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` are not the same.
|
||||||
TypeError: If dtype of `x1` or `x2` is not one of: float32, float16, int32.
|
|
||||||
TypeError: If dtype of `value` is not one of: float32, float16, int32.
|
|
||||||
ValueError: If `x1` could not be broadcast to `x2`.
|
ValueError: If `x1` could not be broadcast to `x2`.
|
||||||
ValueError: If `value` could not be broadcast to `x1` * `x2`.
|
ValueError: If `value` could not be broadcast to `x1` * `x2`.
|
||||||
ValueError: If `input_data` could not be broadcast to `value*(x1*x2)`.
|
ValueError: If `input_data` could not be broadcast to `value*(x1*x2)`.
|
||||||
|
|
Loading…
Reference in New Issue