forked from mindspore-Ecosystem/mindspore
fix api migration issues and some doc issues
This commit is contained in:
parent
a4b2afc0ea
commit
2b78a9ecbd
|
@ -347,3 +347,5 @@ mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel
|
|||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpSpecialComputeComplex
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/sparse_dense_cwise_utils.cc:aicpu::SparseDenseCwiseOpKernel<Op>::SparseDenseCwiseOpBcastComputeComplex
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/resize_bicubic_grad.cc:aicpu::ResizeBicubicGrad
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/segment_max.cc:aicpu::SegmentMaxCpuKernel::SegmentMaxCompute
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/cpu_kernel/ms_kernel/extract_glimpse.cc:aicpu::ExtractGlimpseCpuKernel::Compute
|
||||
|
|
|
@ -312,6 +312,7 @@ constexpr auto kExpandDOpName = "ExpandD";
|
|||
constexpr auto kExpandDimsOpName = "ExpandDims";
|
||||
constexpr auto kExpOpName = "Exp";
|
||||
constexpr auto kExtractGlimpse = "ExtractGlimpse";
|
||||
constexpr auto kExtractGlimpseOpName = "ExtractGlimpse";
|
||||
constexpr auto kExtractImagePatchesOpName = "ExtractImagePatches";
|
||||
constexpr auto kEyeOpName = "Eye";
|
||||
constexpr auto kFastGeLUOpName = "FastGeLU";
|
||||
|
@ -468,6 +469,7 @@ constexpr auto kLSTMGradOpName = "LSTMGrad";
|
|||
constexpr auto kLSTMInputGradOpName = "LSTMInputGrad";
|
||||
constexpr auto kLSTMOpName = "LSTM";
|
||||
constexpr auto kLstsqOpName = "Lstsq";
|
||||
constexpr auto kLuSolveOpName = "LuSolve";
|
||||
constexpr auto kLuUnpackOpName = "LuUnpack";
|
||||
constexpr auto kLuUnpackGradOpName = "LuUnpackGrad";
|
||||
constexpr auto kMaskedFillOpName = "MaskedFill";
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
namespace ge {
|
||||
constexpr int64_t kMaxDimSize = 32;
|
||||
constexpr int64_t DIM_SIZE2 = 2;
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct RuntimeTensorDesc {
|
||||
|
|
|
@ -1,154 +0,0 @@
|
|||
/**
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "cache_swap_table.h"
|
||||
#include <securec.h>
|
||||
#include <map>
|
||||
#include "cpu_types.h"
|
||||
#include "kernel_log.h"
|
||||
#include "status.h"
|
||||
#include "utils/sparse_tensor.h"
|
||||
#include "utils/kernel_util.h"
|
||||
|
||||
namespace {
|
||||
const char *const kCacheSwapTable = "CacheSwapTable";
|
||||
}
|
||||
|
||||
namespace aicpu {
|
||||
template <typename T>
|
||||
uint32_t CacheSwapTableTask(std::vector<Tensor *> &inputs, std::vector<Tensor *> &outputs, int64_t batch_size,
|
||||
int64_t output_size, int64_t one_line_col, int type_size) {
|
||||
if (inputs.size() == 0 || outputs.size() == 0) {
|
||||
KERNEL_LOG_ERROR("CacheSwapTable input or output is empty.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
|
||||
char *cache_table = reinterpret_cast<char *>(inputs[0]->GetData());
|
||||
T *swap_cache_idx = reinterpret_cast<T *>(inputs[1]->GetData());
|
||||
uint64_t swap_cache_idx_size = inputs[1]->GetDataSize();
|
||||
char *miss_value = reinterpret_cast<char *>(inputs[2]->GetData());
|
||||
|
||||
char *old_value = reinterpret_cast<char *>(outputs[0]->GetData());
|
||||
|
||||
errno_t ret = memset_s(old_value, static_cast<size_t>(output_size * type_size), 0x00,
|
||||
static_cast<size_t>(output_size * type_size));
|
||||
if (ret != EOK) {
|
||||
KERNEL_LOG_ERROR("Memset failed, result[%d]", ret);
|
||||
return KERNEL_STATUS_INNER_ERROR;
|
||||
}
|
||||
|
||||
uint64_t single_copy_size = static_cast<uint64_t>(type_size * one_line_col);
|
||||
|
||||
if (swap_cache_idx_size < static_cast<uint64_t>(batch_size)) {
|
||||
KERNEL_LOG_ERROR(
|
||||
"The value of swap_cache_idx_size:[%llu] must be less than "
|
||||
"batch_size:[%lld]",
|
||||
swap_cache_idx_size, batch_size);
|
||||
return KERNEL_STATUS_INNER_ERROR;
|
||||
}
|
||||
|
||||
uint64_t old_value_size = outputs[0]->GetDataSize();
|
||||
uint64_t cache_table_size = inputs[0]->GetDataSize();
|
||||
for (int64_t i = 0; i < batch_size; ++i) {
|
||||
if (swap_cache_idx[i] < 0) {
|
||||
continue;
|
||||
}
|
||||
ret = memcpy_s(old_value + i * single_copy_size, old_value_size, cache_table + swap_cache_idx[i] * single_copy_size,
|
||||
single_copy_size);
|
||||
old_value_size -= single_copy_size;
|
||||
if (ret != EOK) {
|
||||
KERNEL_LOG_ERROR("CacheSwapTable memcpy failed, result [%d].", ret);
|
||||
return KERNEL_STATUS_INNER_ERROR;
|
||||
}
|
||||
ret = memcpy_s(cache_table + swap_cache_idx[i] * single_copy_size, cache_table_size,
|
||||
miss_value + i * single_copy_size, single_copy_size);
|
||||
cache_table_size -= single_copy_size;
|
||||
if (ret != EOK) {
|
||||
KERNEL_LOG_ERROR("CacheSwapTable memcpy failed, result [%d].", ret);
|
||||
return KERNEL_STATUS_INNER_ERROR;
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
uint32_t CacheSwapTableMsCpuKernel::DoCompute() {
|
||||
std::map<int, std::function<uint32_t(std::vector<Tensor *> &, std::vector<Tensor *> &, int64_t &, int64_t &,
|
||||
int64_t &, int &)>>
|
||||
calls;
|
||||
calls[DT_INT32] = CacheSwapTableTask<int32_t>;
|
||||
calls[DT_INT64] = CacheSwapTableTask<int64_t>;
|
||||
|
||||
if (calls.find(indices_type_) == calls.end()) {
|
||||
KERNEL_LOG_ERROR(
|
||||
"CacheSwapTableMsCpuKernel op doesn't support indices tensor types: "
|
||||
"[%s]",
|
||||
DTypeStr(indices_type_).c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
|
||||
int type_size = GetSizeByDataType(param_type_);
|
||||
return calls[indices_type_](inputs_, outputs_, batch_size_, output_size_, one_line_col_, type_size);
|
||||
}
|
||||
|
||||
uint32_t CacheSwapTableMsCpuKernel::GetInputAndCheck(CpuKernelContext &ctx) {
|
||||
KERNEL_LOG_INFO("GetInputAndCheck start!");
|
||||
// get input Tensors
|
||||
const uint32_t kNumInput = 3;
|
||||
for (uint32_t i = 0; i < kNumInput; ++i) {
|
||||
Tensor *tensor = ctx.Input(i);
|
||||
KERNEL_CHECK_NULLPTR(tensor, KERNEL_STATUS_PARAM_INVALID, "Get input tensor[%d] failed", i)
|
||||
inputs_.push_back(tensor);
|
||||
}
|
||||
// get output Tensors
|
||||
const uint32_t kNumOutput = 1;
|
||||
for (uint32_t i = 0; i < kNumOutput; ++i) {
|
||||
Tensor *tensor = ctx.Output(i);
|
||||
KERNEL_CHECK_NULLPTR(tensor, KERNEL_STATUS_PARAM_INVALID, "Get output tensor[%d] failed", i)
|
||||
outputs_.push_back(tensor);
|
||||
}
|
||||
// get param type
|
||||
param_type_ = static_cast<DataType>(inputs_[0]->GetDataType());
|
||||
indices_type_ = static_cast<DataType>(inputs_[1]->GetDataType());
|
||||
KERNEL_LOG_INFO("GetInputAndCheck success!");
|
||||
|
||||
std::shared_ptr<TensorShape> cache_table_shape = ctx.Input(0)->GetTensorShape();
|
||||
std::shared_ptr<TensorShape> indices_shape = ctx.Input(1)->GetTensorShape();
|
||||
|
||||
for (int32_t i = 1; i < cache_table_shape->GetDims(); ++i) {
|
||||
KERNEL_CHECK_ASSIGN_64S_MULTI(one_line_col_, cache_table_shape->GetDimSize(i), one_line_col_,
|
||||
KERNEL_STATUS_PARAM_INVALID);
|
||||
}
|
||||
for (int32_t i = 0; i < indices_shape->GetDims(); ++i) {
|
||||
KERNEL_CHECK_ASSIGN_64S_MULTI(batch_size_, indices_shape->GetDimSize(i), batch_size_, KERNEL_STATUS_PARAM_INVALID);
|
||||
}
|
||||
output_size_ = batch_size_ * one_line_col_;
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
uint32_t CacheSwapTableMsCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
uint32_t res = GetInputAndCheck(ctx);
|
||||
if (res != KERNEL_STATUS_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
res = DoCompute();
|
||||
if (res != KERNEL_STATUS_OK) {
|
||||
KERNEL_LOG_ERROR("Compute failed");
|
||||
return res;
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
REGISTER_CPU_KERNEL(kCacheSwapTable, CacheSwapTableMsCpuKernel);
|
||||
} // namespace aicpu
|
|
@ -0,0 +1,213 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "extract_glimpse.h"
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include "cpu_kernel_utils.h"
|
||||
#include "utils/eigen_tensor.h"
|
||||
#include "utils/kernel_util.h"
|
||||
using namespace std;
|
||||
random_device rd;
|
||||
mt19937 gen(rd());
|
||||
uniform_real_distribution<float> dis_uniform(0.0f, 255.0f);
|
||||
normal_distribution<float> dis_normal(10, 0.5);
|
||||
#define SHED 2048
|
||||
namespace {
|
||||
const uint32_t kOutputNum = 1;
|
||||
const uint32_t kInputNum = 3;
|
||||
const char *kExtractGlimpse = "ExtractGlimpse";
|
||||
} // namespace
|
||||
namespace aicpu {
|
||||
uint32_t ExtractGlimpseCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "ExtractGlimpse check input and output number failed.");
|
||||
KERNEL_HANDLE_ERROR(ExtractGlimpseCheck(ctx), "ExtractGlimpse check params failed.");
|
||||
Tensor *x = ctx.Input(0);
|
||||
Tensor *ss = ctx.Input(1);
|
||||
Tensor *offsets = ctx.Input(2);
|
||||
Tensor *y = ctx.Output(0);
|
||||
AttrValue *centered = ctx.GetAttr("centered");
|
||||
AttrValue *normalized = ctx.GetAttr("normalized");
|
||||
AttrValue *uniform_noise = ctx.GetAttr("uniform_noise");
|
||||
AttrValue *noise = ctx.GetAttr("noise");
|
||||
float *x_data = (float *)x->GetData();
|
||||
int32_t *ss_data = (int32_t *)ss->GetData();
|
||||
float *offsets_data = (float *)offsets->GetData();
|
||||
float *y_data = (float *)y->GetData();
|
||||
uint64_t offsets_cnt = offsets->GetTensorShape()->GetDimSize(0);
|
||||
uint64_t batch_cnt = x->GetTensorShape()->GetDimSize(0);
|
||||
KERNEL_CHECK_FALSE(offsets_cnt == batch_cnt, KERNEL_STATUS_PARAM_INVALID, "offsets should equal to batches")
|
||||
int64_t image_height = x->GetTensorShape()->GetDimSize(1);
|
||||
int64_t image_width = x->GetTensorShape()->GetDimSize(2);
|
||||
int64_t channels = x->GetTensorShape()->GetDimSize(3);
|
||||
uint64_t g_height = ss_data[0], g_width = ss_data[1];
|
||||
uint64_t size1 = image_width * image_height * channels;
|
||||
uint64_t size2 = image_width * channels;
|
||||
uint64_t size3 = g_height * g_width * channels;
|
||||
uint64_t size4 = size3 / g_height;
|
||||
int64_t g_size = g_width * g_height;
|
||||
if (batch_cnt > SHED) {
|
||||
uint32_t min_core = 1;
|
||||
uint64_t max_core = std::max(min_core, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||
max_core = min(max_core, (uint64_t)batch_cnt);
|
||||
auto fun = [&](size_t st, size_t ed) {
|
||||
for (auto i = st; i < ed; i++) {
|
||||
float x = offsets_data[i << 1], y = offsets_data[1 + (i << 1)];
|
||||
if (normalized->GetBool()) {
|
||||
x *= image_height;
|
||||
y *= image_width;
|
||||
}
|
||||
if (centered->GetBool()) {
|
||||
x /= 2.0f;
|
||||
y /= 2.0f;
|
||||
x += image_height / 2.0f;
|
||||
y += image_width / 2.0f;
|
||||
}
|
||||
x -= g_height / 2.0f;
|
||||
y -= g_width / 2.0f;
|
||||
for (int64_t v = 0; v < g_size; v++) {
|
||||
int64_t j = v / g_width, k = v % g_width;
|
||||
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||
for (int u = 0; u < channels; u++) {
|
||||
if (uniform_noise->GetBool())
|
||||
y_data[pos_y + u] = dis_uniform(gen);
|
||||
else if (noise->GetString() == "zero")
|
||||
y_data[pos_y + u] = 0.0f;
|
||||
else if (noise->GetString() == "gaussian")
|
||||
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||
else {
|
||||
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||
for (int u = 0; u < channels; u++) {
|
||||
y_data[pos_y + u] = x_data[pos_x + u];
|
||||
}
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
};
|
||||
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, batch_cnt, batch_cnt / max_core, fun),
|
||||
"ExtractGlimpse Compute failed.");
|
||||
} else {
|
||||
for (uint64_t i = 0; i < batch_cnt; i++) {
|
||||
float x = offsets_data[i << 1], y = offsets_data[1 + (i << 1)];
|
||||
if (normalized->GetBool()) {
|
||||
x *= image_height;
|
||||
y *= image_width;
|
||||
}
|
||||
if (centered->GetBool()) {
|
||||
x /= 2.0f;
|
||||
y /= 2.0f;
|
||||
x += image_height / 2.0f;
|
||||
y += image_width / 2.0f;
|
||||
}
|
||||
x -= g_height / 2.0f;
|
||||
y -= g_width / 2.0f;
|
||||
if (g_size < SHED) {
|
||||
for (int64_t v = 0; v < g_size; v++) {
|
||||
int64_t j = v / g_width, k = v % g_width;
|
||||
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||
for (int u = 0; u < channels; u++) {
|
||||
if (uniform_noise->GetBool())
|
||||
y_data[pos_y + u] = dis_uniform(gen);
|
||||
else if (noise->GetString() == "zero")
|
||||
y_data[pos_y + u] = 0.0f;
|
||||
else if (noise->GetString() == "gaussian")
|
||||
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||
else {
|
||||
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||
for (int u = 0; u < channels; u++) {
|
||||
y_data[pos_y + u] = x_data[pos_x + u];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t min_core = 1;
|
||||
uint64_t max_core = std::max(min_core, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||
max_core = min(max_core, (uint64_t)g_size);
|
||||
auto fun = [&](size_t st, size_t ed) {
|
||||
for (auto v = st; v < ed; v++) {
|
||||
int64_t j = v / g_width, k = v % g_width;
|
||||
int64_t a = (int64_t)x + j, b = (int64_t)y + k;
|
||||
uint64_t pos_y = i * size3 + j * size4 + k * channels;
|
||||
if (a < 0 || a >= image_height || b < 0 || b >= image_width) {
|
||||
for (int u = 0; u < channels; u++)
|
||||
if (uniform_noise->GetBool())
|
||||
y_data[pos_y + u] = dis_uniform(gen);
|
||||
else if (noise->GetString() == "zero")
|
||||
y_data[pos_y + u] = 0.0f;
|
||||
else if (noise->GetString() == "gaussian")
|
||||
y_data[pos_y + u] = max(0.0f, dis_normal(gen));
|
||||
else {
|
||||
KERNEL_LOG_ERROR("noise type [%s] unsupported.", noise->GetString().c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
uint64_t pos_x = i * size1 + a * size2 + b * channels;
|
||||
for (int u = 0; u < channels; u++) {
|
||||
y_data[pos_y + u] = x_data[pos_x + u];
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
};
|
||||
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, g_size, g_size / max_core, fun),
|
||||
"ExtractGlimpse Compute failed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
uint32_t ExtractGlimpseCpuKernel::ExtractGlimpseCheck(CpuKernelContext &ctx) {
|
||||
Tensor *x = ctx.Input(0);
|
||||
Tensor *ss = ctx.Input(1);
|
||||
Tensor *offsets = ctx.Input(2);
|
||||
Tensor *y = ctx.Output(0);
|
||||
AttrValue *centered = ctx.GetAttr("centered");
|
||||
AttrValue *normalized = ctx.GetAttr("normalized");
|
||||
AttrValue *uniform_noise = ctx.GetAttr("uniform_noise");
|
||||
AttrValue *noise = ctx.GetAttr("noise");
|
||||
KERNEL_CHECK_NULLPTR(x, KERNEL_STATUS_PARAM_INVALID, "Get input 0 failed.")
|
||||
KERNEL_CHECK_NULLPTR(ss, KERNEL_STATUS_PARAM_INVALID, "Get input 1 failed.")
|
||||
KERNEL_CHECK_NULLPTR(offsets, KERNEL_STATUS_PARAM_INVALID, "Get input 2 failed.")
|
||||
KERNEL_CHECK_NULLPTR(y, KERNEL_STATUS_PARAM_INVALID, "Get output 0 failed.")
|
||||
KERNEL_CHECK_NULLPTR(centered, KERNEL_STATUS_PARAM_INVALID, "Get attribute centered failed.")
|
||||
KERNEL_CHECK_NULLPTR(normalized, KERNEL_STATUS_PARAM_INVALID, "Get attribute normalized failed.")
|
||||
KERNEL_CHECK_NULLPTR(uniform_noise, KERNEL_STATUS_PARAM_INVALID, "Get attribute uniform_noise failed.")
|
||||
KERNEL_CHECK_NULLPTR(noise, KERNEL_STATUS_PARAM_INVALID, "Get attribute noise failed.")
|
||||
KERNEL_CHECK_NULLPTR(x->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 0 data failed.")
|
||||
KERNEL_CHECK_NULLPTR(ss->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 1 data failed.")
|
||||
KERNEL_CHECK_NULLPTR(offsets->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get input 2 data failed.")
|
||||
KERNEL_CHECK_NULLPTR(y->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output 0 data failed.")
|
||||
KERNEL_CHECK_FALSE(x->GetDataType() == DT_FLOAT && ss->GetDataType() == DT_INT32 &&
|
||||
offsets->GetDataType() == DT_FLOAT && y->GetDataType() == DT_FLOAT,
|
||||
KERNEL_STATUS_PARAM_INVALID, "data type error.")
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
REGISTER_CPU_KERNEL(kExtractGlimpse, ExtractGlimpseCpuKernel);
|
||||
} // namespace aicpu
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_IMPL_EXTRACT_GLIMPSE_H_
|
||||
#define AICPU_IMPL_EXTRACT_GLIMPSE_H_
|
||||
|
||||
#include "cpu_ops_kernel.h"
|
||||
|
||||
namespace aicpu {
|
||||
class ExtractGlimpseCpuKernel : public CpuKernel {
|
||||
public:
|
||||
ExtractGlimpseCpuKernel() = default;
|
||||
~ExtractGlimpseCpuKernel() override = default;
|
||||
|
||||
protected:
|
||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||
|
||||
private:
|
||||
static uint32_t ExtractGlimpseCheck(CpuKernelContext &ctx);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif
|
|
@ -124,27 +124,6 @@ double FFTWithSizeCpuKernel::Getnormalized(int64_t n, std::string normalized, bo
|
|||
if (normalized == "backward") result = 1.0 / n;
|
||||
if (normalized == "ortho") result = 1.0 / sqrt((double)n);
|
||||
}
|
||||
// if (signal_ndim == 1) {
|
||||
// result = sqrt((double)out_shape[out_shape.size() - 1]);
|
||||
// } else if (signal_ndim == 2) {
|
||||
// result = sqrt((double)(out_shape[out_shape.size() - 1] *
|
||||
// out_shape[out_shape.size() - 2]));
|
||||
// } else {
|
||||
// result = sqrt((double)(out_shape[out_shape.size() - 1] *
|
||||
// out_shape[out_shape.size() - 2] *
|
||||
// out_shape[out_shape.size() - 3]));
|
||||
// }
|
||||
// if (is_reverse) {
|
||||
// if (result == 0) {
|
||||
// KERNEL_LOG_ERROR("DivideByZeroExcepiton");
|
||||
// }
|
||||
// result = 1.0 / result;
|
||||
// }
|
||||
// KERNEL_LOG_DEBUG(
|
||||
// "FFTWithSizeCpuKernel[GetNormalized], "
|
||||
// "input_shape[%s] normalize[%s]. "
|
||||
// "is_reverse: [%d]. norm_:[%lf]",
|
||||
// VectorToString(out_shape).c_str(), normalized, is_reverse, result);
|
||||
std::cout << "result = " << result << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
@ -350,14 +329,7 @@ uint32_t FFTWithSizeCpuKernel::FFTWithSizeCompute(CpuKernelContext &ctx, bool on
|
|||
if (is_real) {
|
||||
inverse = real_inverse;
|
||||
}
|
||||
std::cout << out;
|
||||
std::cout << "===========";
|
||||
// if
|
||||
// std::vector<int64_t> out_shape(out.dimensions().begin(),
|
||||
// out.dimensions().end());
|
||||
// if (is_real && !inverse) {
|
||||
// out_shape.back() = x_shape.back();
|
||||
// }
|
||||
|
||||
std::cout << out;
|
||||
auto cout = x_shape_ptr->NumElements();
|
||||
auto norm = Getnormalized(cout, normalized, inverse);
|
||||
|
|
|
@ -1,18 +1,3 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
||||
#define AICPU_KERNELS_NORMALIZED_FFTWITHSIZE_H_
|
||||
|
||||
|
@ -37,4 +22,4 @@ class FFTWithSizeCpuKernel : public CpuKernel {
|
|||
static double Getnormalized(int64_t n, std::string normalized, bool is_reverse);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -15,129 +15,160 @@
|
|||
*/
|
||||
|
||||
#include "fill.h"
|
||||
#include "cpu_kernel_utils.h"
|
||||
#include "utils/eigen_tensor.h"
|
||||
#include "utils/kernel_util.h"
|
||||
|
||||
namespace {
|
||||
const char *const kFill = "Fill";
|
||||
}
|
||||
const uint32_t kOutputNum = 1;
|
||||
const uint32_t kInputNum = 2;
|
||||
const char *kFill = "Fill";
|
||||
const char *kFillV2 = "FillV2";
|
||||
const int64_t kParallelDataNumCriticalPoint1 = 128 * 1024;
|
||||
const int64_t kParallelDataNumCriticalPoint2 = 2 * 1024 * 1024;
|
||||
|
||||
#define CALCULATE_DIMS_DTYPE_CASE(DTYPE, TYPE) \
|
||||
case (DTYPE): { \
|
||||
if (CalculateDims<TYPE>(dims_tensor, dims) != KERNEL_STATUS_OK) { \
|
||||
KERNEL_LOG_ERROR("Fill kernel calculate dims failed."); \
|
||||
return KERNEL_STATUS_PARAM_INVALID; \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define FILL_GENERATE_DTYPE_CASE(DTYPE, TYPE) \
|
||||
case (DTYPE): { \
|
||||
FillOutput<TYPE>(ctx, value_tensor, output); \
|
||||
break; \
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace aicpu {
|
||||
template <typename T>
|
||||
void FillGenerateCase(Tensor *&value_tensor, Tensor *&output) {
|
||||
auto value = *(reinterpret_cast<T *>(value_tensor->GetData()));
|
||||
if (AddrAlignedCheck(output->GetData())) {
|
||||
Eigen::TensorMap<Eigen::Tensor<T, 1>, Eigen::Aligned> eigen_output(static_cast<T *>(output->GetData()),
|
||||
output->GetTensorShape()->NumElements());
|
||||
eigen_output.setConstant(value);
|
||||
} else {
|
||||
Eigen::TensorMap<Eigen::Tensor<T, 1>, Eigen::Unaligned> eigen_output(static_cast<T *>(output->GetData()),
|
||||
output->GetTensorShape()->NumElements());
|
||||
eigen_output.setConstant(value);
|
||||
}
|
||||
}
|
||||
uint32_t FillCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
// 校验输入个数和输出个数,以及输入和输入tensor的属性是否为空
|
||||
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "Check input and output number failed.");
|
||||
|
||||
uint32_t FillCpuKernel::GetDimsByType(CpuKernelContext &ctx) {
|
||||
dims.clear();
|
||||
std::vector<int64_t> dims;
|
||||
Tensor *dims_tensor = ctx.Input(0);
|
||||
KERNEL_CHECK_NULLPTR(dims_tensor, KERNEL_STATUS_PARAM_INVALID, "Get dims input failed")
|
||||
uint32_t ret;
|
||||
auto dims_dtype = dims_tensor->GetDataType();
|
||||
switch (dims_dtype) {
|
||||
case (DT_INT32):
|
||||
ret = CalcDims<int32_t>(dims_tensor, dims);
|
||||
break;
|
||||
case (DT_INT64):
|
||||
ret = CalcDims<int64_t>(dims_tensor, dims);
|
||||
break;
|
||||
CALCULATE_DIMS_DTYPE_CASE(DT_INT32, int32_t)
|
||||
CALCULATE_DIMS_DTYPE_CASE(DT_INT64, int64_t)
|
||||
default:
|
||||
KERNEL_LOG_ERROR(
|
||||
"Fill kernel dims data_type [%u] not support, support data_types: "
|
||||
"DT_INT32, DT_INT64",
|
||||
dims_dtype);
|
||||
KERNEL_LOG_ERROR("Fill kernel dims data_type [%u] not support, support data_types: DT_INT32, DT_INT64.",
|
||||
dims_dtype);
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
if (ret != KERNEL_STATUS_OK) {
|
||||
KERNEL_LOG_ERROR("Fill kernel calculate dims failed");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t FillCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
uint32_t check = GetDimsByType(ctx);
|
||||
if (check != KERNEL_STATUS_OK) {
|
||||
return check;
|
||||
}
|
||||
Tensor *value_tensor = ctx.Input(1);
|
||||
KERNEL_CHECK_NULLPTR(value_tensor, KERNEL_STATUS_PARAM_INVALID, "Get value input failed")
|
||||
KERNEL_CHECK_NULLPTR(value_tensor->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get value input data failed")
|
||||
KERNEL_CHECK_NULLPTR(value_tensor->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get value input shape failed")
|
||||
if (!value_tensor->GetTensorShape()->GetDimSizes().empty()) {
|
||||
if (value_tensor->NumElements() != 1) {
|
||||
KERNEL_LOG_ERROR("Fill kernel value input is not a scalar.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
|
||||
Tensor *output = ctx.Output(0);
|
||||
KERNEL_CHECK_NULLPTR(output, KERNEL_STATUS_PARAM_INVALID, "Get output failed")
|
||||
KERNEL_CHECK_NULLPTR(output->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get output data failed")
|
||||
KERNEL_CHECK_NULLPTR(output->GetTensorShape(), KERNEL_STATUS_PARAM_INVALID, "Get output shape failed")
|
||||
if (output->GetTensorShape()->GetDimSizes() != dims) {
|
||||
if (output->GetTensorShape()->GetDims() != static_cast<int64_t>(dims.size())) {
|
||||
KERNEL_LOG_ERROR("Fill kernel output shape not matched.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
if (output->GetTensorShape()->GetDimSizes() != dims) {
|
||||
output->GetTensorShape()->SetDimSizes(dims);
|
||||
}
|
||||
|
||||
auto input_dtype = value_tensor->GetDataType();
|
||||
auto output_dtype = output->GetDataType();
|
||||
if (input_dtype != output_dtype) {
|
||||
KERNEL_LOG_ERROR("Fill kernel data type not matched, value input dtype [%u], output dtype [%u].", input_dtype,
|
||||
output_dtype);
|
||||
KERNEL_LOG_ERROR(
|
||||
"Fill kernel data type not matched, value input dtype [%u], output dtype [%u], support data_types: "
|
||||
"DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, DT_INT32, DT_INT64, DT_INT8, DT_UINT16, "
|
||||
"DT_UINT32, DT_UINT64, DT_UINT8, DT_BOOL.",
|
||||
input_dtype, output_dtype);
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
|
||||
std::map<int, std::function<void(Tensor *&, Tensor *&)>> calls;
|
||||
calls[DT_INT8] = FillGenerateCase<int8_t>;
|
||||
calls[DT_UINT8] = FillGenerateCase<uint8_t>;
|
||||
calls[DT_INT16] = FillGenerateCase<int16_t>;
|
||||
calls[DT_UINT16] = FillGenerateCase<uint16_t>;
|
||||
calls[DT_INT32] = FillGenerateCase<int32_t>;
|
||||
calls[DT_UINT32] = FillGenerateCase<uint32_t>;
|
||||
calls[DT_INT64] = FillGenerateCase<int64_t>;
|
||||
calls[DT_UINT64] = FillGenerateCase<uint64_t>;
|
||||
calls[DT_BOOL] = FillGenerateCase<bool>;
|
||||
calls[DT_FLOAT16] = FillGenerateCase<Eigen::half>;
|
||||
calls[DT_FLOAT] = FillGenerateCase<float>;
|
||||
calls[DT_DOUBLE] = FillGenerateCase<double>;
|
||||
|
||||
if (calls.find(output_dtype) == calls.end()) {
|
||||
KERNEL_LOG_ERROR("Fill kernel data type [%u] not support", output_dtype);
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
switch (output_dtype) {
|
||||
FILL_GENERATE_DTYPE_CASE(DT_INT8, int8_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_UINT8, uint8_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_INT16, int16_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_UINT16, uint16_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_INT32, int32_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_UINT32, uint32_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_INT64, int64_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_UINT64, uint64_t)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_BOOL, bool)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_FLOAT16, Eigen::half)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_FLOAT, float)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_DOUBLE, double)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_COMPLEX64, std::complex<float>)
|
||||
FILL_GENERATE_DTYPE_CASE(DT_COMPLEX128, std::complex<double>)
|
||||
default:
|
||||
KERNEL_LOG_ERROR(
|
||||
"Fill kernel data type [%u] not support, not support data_types: DT_STRING, DT_DUAL_SUB_INT8, "
|
||||
"DT_DUAL_SUB_UINT8, DT_QUINT8, DT_QINT8, DT_QINT32, DT_QINT16, DT_QUINT16, DT_RESOURCE, DT_STRING_REF, "
|
||||
"DT_DUAL, DT_UNDEFINED.",
|
||||
output_dtype);
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
calls[output_dtype](value_tensor, output);
|
||||
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint32_t FillCpuKernel::CalcDims(const Tensor *dims_tensor, std::vector<int64_t> &dim_vec) {
|
||||
uint32_t FillCpuKernel::CalculateDims(const Tensor *dims_tensor, std::vector<int64_t> &dims) {
|
||||
// 获取第一个输入tensor中的元素个数,第一个输入是一个一维的tensor(dims_tensor)
|
||||
uint64_t data_num = dims_tensor->GetDataSize() / sizeof(T);
|
||||
if (data_num == 0) {
|
||||
KERNEL_LOG_INFO("Fill kernel: dims is empty, fill scalar output.");
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
auto dims_data = reinterpret_cast<const T *>(dims_tensor->GetData());
|
||||
|
||||
KERNEL_CHECK_NULLPTR(dims_tensor->GetData(), KERNEL_STATUS_PARAM_INVALID, "Get dims data failed")
|
||||
for (uint64_t i = 0; i < data_num; i++) {
|
||||
auto dim = *(reinterpret_cast<const T *>(dims_tensor->GetData()) + i);
|
||||
auto dim = *(dims_data + i);
|
||||
if (dim < 0) {
|
||||
KERNEL_LOG_ERROR("Fill kernel: input dim [%llu] is negative, value=[%lld]", i, static_cast<int64_t>(dim));
|
||||
KERNEL_LOG_ERROR("dims input dim [%llu] is negative, value=[%lld].", i, static_cast<int64_t>(dim));
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
// zero dim is different from empty dim.
|
||||
if (dim == 0) {
|
||||
KERNEL_LOG_INFO("Fill kernel: input dim [%llu] is zero", i);
|
||||
KERNEL_LOG_INFO("dims input dim [%llu] is zero.", i);
|
||||
dims.clear();
|
||||
break;
|
||||
}
|
||||
dim_vec.emplace_back(dim);
|
||||
dims.emplace_back(dim);
|
||||
}
|
||||
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FillCpuKernel::FillOutput(CpuKernelContext &ctx, const Tensor *value_tensor, Tensor *output) {
|
||||
auto value = reinterpret_cast<T *>(value_tensor->GetData());
|
||||
auto output_data = reinterpret_cast<T *>(output->GetData());
|
||||
int64_t data_num = output->NumElements();
|
||||
|
||||
if (data_num >= kParallelDataNumCriticalPoint1) {
|
||||
uint32_t min_core_num = 1;
|
||||
uint32_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx));
|
||||
|
||||
if (data_num <= kParallelDataNumCriticalPoint2) {
|
||||
max_core_num = std::min(max_core_num, 4U);
|
||||
}
|
||||
|
||||
if (max_core_num > data_num) {
|
||||
max_core_num = data_num;
|
||||
}
|
||||
|
||||
auto shared_fill = [&](int64_t start, int64_t end) { SpecialFillOutput<T>(start, end, output_data, value); };
|
||||
|
||||
CpuKernelUtils::ParallelFor(ctx, data_num, data_num / max_core_num, shared_fill);
|
||||
} else {
|
||||
SpecialFillOutput<T>(0, data_num, output_data, value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FillCpuKernel::SpecialFillOutput(int64_t start, int64_t end, T *output_data, const T *value) {
|
||||
for (int64_t i = start; i < end; i++) {
|
||||
*(output_data + i) = *(value);
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_CPU_KERNEL(kFill, FillCpuKernel);
|
||||
REGISTER_CPU_KERNEL(kFillV2, FillCpuKernel);
|
||||
} // namespace aicpu
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_FILL_H
|
||||
#define AICPU_KERNELS_NORMALIZED_FILL_H
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_FILL_H_
|
||||
#define AICPU_KERNELS_NORMALIZED_FILL_H_
|
||||
|
||||
#include "cpu_ops_kernel.h"
|
||||
|
||||
|
@ -23,21 +23,18 @@ namespace aicpu {
|
|||
class FillCpuKernel : public CpuKernel {
|
||||
public:
|
||||
FillCpuKernel() = default;
|
||||
~FillCpuKernel() override = default;
|
||||
~FillCpuKernel() = default;
|
||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||
|
||||
private:
|
||||
uint32_t GetDimsByType(CpuKernelContext &ctx);
|
||||
/**
|
||||
* @brief calc dims from input dims tensor
|
||||
* @param dims_tensor input dims tensor
|
||||
* @param dims output shape dims
|
||||
* @return status if success
|
||||
*/
|
||||
template <typename T>
|
||||
uint32_t CalcDims(const Tensor *dims_tensor, std::vector<int64_t> &dims);
|
||||
uint32_t CalculateDims(const Tensor *dims_tensor, std::vector<int64_t> &dims);
|
||||
|
||||
std::vector<int64_t> dims;
|
||||
template <typename T>
|
||||
void FillOutput(CpuKernelContext &ctx, const Tensor *value_tensor, Tensor *output);
|
||||
|
||||
template <typename T>
|
||||
void SpecialFillOutput(int64_t start, int64_t end, T *output_data, const T *value);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_KERNELS_NORMALIZED_FILL_H_
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
/**
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "log_normal_reverse.h"
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include "cpu_kernel_utils.h"
|
||||
#include "cpu_ops_kernel.h"
|
||||
#include "utils/eigen_tensor.h"
|
||||
#include "utils/kernel_util.h"
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
|
||||
#include "Eigen/Core"
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
namespace {
|
||||
const uint32_t kNumInput = 1;
|
||||
const uint32_t kNumOutput = 1;
|
||||
|
||||
const char *kLogNormalReverse = "LogNormalReverse";
|
||||
const int64_t kParallelDataNumSameShape = 16 * 1024;
|
||||
const int64_t kParallelDataNumMid = 128 * 1024;
|
||||
} // namespace
|
||||
namespace aicpu {
|
||||
uint32_t LogNormalReverseCpuKernel::GetInputAndCheck(CpuKernelContext &ctx) {
|
||||
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kNumInput, kNumOutput), "LogNormalReverse check input and output failed.");
|
||||
// get and check input
|
||||
Tensor *input = ctx.Input(0);
|
||||
inputs_.push_back(input);
|
||||
|
||||
// get output Tensors
|
||||
Tensor *output = ctx.Output(0);
|
||||
outputs_.push_back(output);
|
||||
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint32_t LogNormalReverseCpuKernel::DoCompute(CpuKernelContext &ctx) {
|
||||
float input_mean = 1.0;
|
||||
float input_std = 2.0;
|
||||
|
||||
auto mean_value = ctx.GetAttr("mean");
|
||||
auto std_value = ctx.GetAttr("std");
|
||||
|
||||
if (mean_value != nullptr) {
|
||||
input_mean = mean_value->GetFloat();
|
||||
}
|
||||
if (std_value != nullptr) {
|
||||
input_std = std_value->GetFloat();
|
||||
}
|
||||
|
||||
T *output_y = reinterpret_cast<T *>(outputs_[0]->GetData());
|
||||
|
||||
static default_random_engine random_engine(time(0));
|
||||
static std::normal_distribution<float> normal_value(input_mean, input_std);
|
||||
|
||||
int64_t Nums = inputs_[0]->GetTensorShape()->NumElements();
|
||||
|
||||
int64_t data_num = Nums;
|
||||
if (data_num >= kParallelDataNumSameShape) {
|
||||
uint32_t max_core_num = std::max(1U, aicpu::CpuKernelUtils::GetCPUNum(ctx) - kResvCpuNum);
|
||||
|
||||
if (data_num <= kParallelDataNumMid) {
|
||||
max_core_num = std::min(max_core_num, 4U);
|
||||
}
|
||||
if (max_core_num > data_num) {
|
||||
max_core_num = data_num;
|
||||
}
|
||||
|
||||
auto shared_lognormalreverse = [&](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
output_y[i] = static_cast<T>(std::exp(normal_value(random_engine)));
|
||||
}
|
||||
};
|
||||
|
||||
if (max_core_num == 0) {
|
||||
max_core_num = 1;
|
||||
}
|
||||
CpuKernelUtils::ParallelFor(ctx, data_num, data_num / max_core_num, shared_lognormalreverse);
|
||||
} else {
|
||||
for (int64_t i = 0; i < Nums; i++) {
|
||||
output_y[i] = static_cast<T>(std::exp(normal_value(random_engine)));
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
uint32_t LogNormalReverseCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
uint32_t res = GetInputAndCheck(ctx);
|
||||
if (res != KERNEL_STATUS_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
DataType input_type{ctx.Input(0)->GetDataType()};
|
||||
switch (input_type) {
|
||||
case (DT_FLOAT16): {
|
||||
DoCompute<Eigen::half>(ctx);
|
||||
break;
|
||||
}
|
||||
case (DT_FLOAT): {
|
||||
DoCompute<float>(ctx);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
KERNEL_LOG_ERROR("[%s] Data type of input is not support, input data type is [%s].", ctx.GetOpType().c_str(),
|
||||
DTypeStr(input_type).c_str());
|
||||
res = KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
if (res != KERNEL_STATUS_OK) {
|
||||
KERNEL_LOG_ERROR("log normal reverse failed");
|
||||
return res;
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
REGISTER_CPU_KERNEL(kLogNormalReverse, LogNormalReverseCpuKernel);
|
||||
} // namespace aicpu
|
|
@ -1,44 +1,38 @@
|
|||
/**
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_CACHE_SWAP_TABLE_H
|
||||
#define AICPU_KERNELS_NORMALIZED_CACHE_SWAP_TABLE_H
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include "cpu_ops_kernel.h"
|
||||
|
||||
namespace aicpu {
|
||||
class CacheSwapTableMsCpuKernel : public CpuKernel {
|
||||
public:
|
||||
~CacheSwapTableMsCpuKernel() = default;
|
||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||
|
||||
private:
|
||||
uint32_t DoCompute();
|
||||
|
||||
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
||||
|
||||
int64_t batch_size_ = 1;
|
||||
int64_t one_line_col_ = 1;
|
||||
int64_t output_size_ = 1;
|
||||
|
||||
std::vector<Tensor *> inputs_;
|
||||
std::vector<Tensor *> outputs_;
|
||||
DataType param_type_ = DT_FLOAT;
|
||||
DataType indices_type_ = DT_INT32;
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
||||
#define AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
||||
|
||||
#include "cpu_ops_kernel.h"
|
||||
|
||||
namespace aicpu {
|
||||
class LogNormalReverseCpuKernel : public CpuKernel {
|
||||
public:
|
||||
LogNormalReverseCpuKernel() = default;
|
||||
~LogNormalReverseCpuKernel() override = default;
|
||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
uint32_t DoCompute(CpuKernelContext &ctx);
|
||||
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
||||
|
||||
std::vector<Tensor *> inputs_;
|
||||
std::vector<Tensor *> outputs_;
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif // AICPU_KERNELS_NORMALIZED_LOGNORMALREVERSE_H_
|
|
@ -21,7 +21,6 @@
|
|||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
#include "kernel_util.h"
|
||||
#include "utils/kernel_util.h"
|
||||
#define NoneN 1000
|
||||
using namespace Eigen;
|
||||
|
|
|
@ -436,4 +436,4 @@ uint32_t ResizeBicubicGradCpuKernel::Compute(CpuKernelContext &ctx) {
|
|||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
REGISTER_CPU_KERNEL(kResizeBicubicGrad, ResizeBicubicGradCpuKernel);
|
||||
} // namespace aicpu
|
||||
} // namespace aicpu
|
||||
|
|
|
@ -35,4 +35,4 @@ class ResizeBicubicGradCpuKernel : public CpuKernel {
|
|||
uint32_t GetInputAndCheck(CpuKernelContext &ctx);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,217 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "segment_max.h"
|
||||
|
||||
#include "cpu_kernel_utils.h"
|
||||
#include "utils/eigen_tensor.h"
|
||||
#include "utils/kernel_util.h"
|
||||
#include "cpu_kernel/common/runtime_tensor_desc.h"
|
||||
|
||||
namespace {
|
||||
const uint32_t kInputNum = 2;
|
||||
const uint32_t kOutputNum = 1;
|
||||
const char *kSegmentMax = "SegmentMax";
|
||||
const int64_t kDataSize = 2 * 1024;
|
||||
|
||||
#define SEGMENTMAX_COMPUTE_CASE(DTYPE, TYPE1, TYPE2, CTX) \
|
||||
case (DTYPE): { \
|
||||
uint32_t result = SegmentMaxCompute<TYPE1, TYPE2>(CTX); \
|
||||
if (result != KERNEL_STATUS_OK) { \
|
||||
KERNEL_LOG_ERROR("SegmentMax kernel compute failed."); \
|
||||
return result; \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define SEGMENTMAX_COMPUTE_CASE_ALL(TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_INT8, int8_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_INT16, int16_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_INT32, int32_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_INT64, int64_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_UINT8, uint8_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_UINT16, uint16_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_UINT32, uint32_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_UINT64, uint64_t, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_FLOAT16, Eigen::half, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_FLOAT, float, TYPE, CTX) \
|
||||
SEGMENTMAX_COMPUTE_CASE(DT_DOUBLE, double, TYPE, CTX)
|
||||
} // namespace
|
||||
|
||||
namespace aicpu {
|
||||
uint32_t SegmentMaxCpuKernel::Compute(CpuKernelContext &ctx) {
|
||||
KERNEL_HANDLE_ERROR(NormalCheck(ctx, kInputNum, kOutputNum), "SegmentMax check input and output number failed.");
|
||||
auto data_type = ctx.Input(0)->GetDataType();
|
||||
auto segment_ids_type = ctx.Input(1)->GetDataType();
|
||||
switch (segment_ids_type) {
|
||||
case DT_INT32: {
|
||||
switch (data_type) {
|
||||
SEGMENTMAX_COMPUTE_CASE_ALL(int32_t, ctx)
|
||||
default:
|
||||
KERNEL_LOG_ERROR("Input[0] data type[%s] not supported.", DTypeStr(data_type).c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DT_INT64: {
|
||||
switch (data_type) {
|
||||
SEGMENTMAX_COMPUTE_CASE_ALL(int64_t, ctx)
|
||||
default:
|
||||
KERNEL_LOG_ERROR("Input[0] data type[%s] not supported.", DTypeStr(data_type).c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
KERNEL_LOG_ERROR("Input[1] data type[%s] not supported.", DTypeStr(segment_ids_type).c_str());
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
uint32_t SegmentMaxCpuKernel::SegmentMaxCompute(CpuKernelContext &ctx) {
|
||||
Tensor *input_x_data = ctx.Input(0);
|
||||
auto input_x_addr = reinterpret_cast<T1 *>(input_x_data->GetData());
|
||||
auto input_x_shape = input_x_data->GetTensorShape();
|
||||
auto input_x_dims = input_x_shape->GetDimSizes();
|
||||
int64_t input_x_num = input_x_data->NumElements();
|
||||
Tensor *segment_ids_data = ctx.Input(1);
|
||||
auto segment_ids_data_addr = reinterpret_cast<T2 *>(segment_ids_data->GetData());
|
||||
int64_t segment_ids_data_num = segment_ids_data->NumElements();
|
||||
input_x_dims[0] = segment_ids_data_addr[segment_ids_data_num - 1] + 1;
|
||||
Tensor *output_data = ctx.Output(0);
|
||||
auto output_data_addr = reinterpret_cast<T1 *>(output_data->GetData());
|
||||
auto output_data_shape = output_data->GetTensorShape();
|
||||
if (output_data_shape->GetDimSize(0) < input_x_dims[0]) {
|
||||
KERNEL_LOG_ERROR("The number of segments of the segmentation result of segment_ids is too large.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
output_data_shape->SetDimSizes(input_x_dims);
|
||||
if (!output_data->SetTensorShape(output_data_shape.get())) {
|
||||
KERNEL_LOG_ERROR("Set output shape failed.");
|
||||
return KERNEL_STATUS_INNER_ERROR;
|
||||
}
|
||||
int64_t output_data_num = output_data->NumElements();
|
||||
for (int64_t i = 0; i < output_data_num; i++) {
|
||||
output_data_addr[i] = static_cast<T1>(0);
|
||||
}
|
||||
std::vector<int64_t> segments_segment_ids;
|
||||
if (segment_ids_data_num != (input_x_data->GetTensorShape()->GetDimSize(0))) {
|
||||
KERNEL_LOG_ERROR("The amount of data for input[1] must be equal to the first dimension of input[0].");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
if (segment_ids_data_addr[0] < 0) {
|
||||
KERNEL_LOG_ERROR("Input[1] must be nonnegative data.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
int64_t seg_tmp = 1;
|
||||
for (int64_t i = 0; i < segment_ids_data_num - 1; i++) {
|
||||
if (segment_ids_data_addr[i] > segment_ids_data_addr[i + 1]) {
|
||||
KERNEL_LOG_ERROR("Input[1] must be an ascending ordered sequence.");
|
||||
return KERNEL_STATUS_PARAM_INVALID;
|
||||
}
|
||||
if (segment_ids_data_addr[i] == segment_ids_data_addr[i + 1]) {
|
||||
seg_tmp++;
|
||||
} else {
|
||||
segments_segment_ids.push_back(seg_tmp);
|
||||
seg_tmp = 1;
|
||||
}
|
||||
if (i == segment_ids_data_num - ge::DIM_SIZE2) {
|
||||
segments_segment_ids.push_back(seg_tmp);
|
||||
}
|
||||
}
|
||||
const int64_t num_compare_per = input_x_num / (input_x_shape->GetDimSize(0));
|
||||
const int64_t num_segments_segment_ids = segments_segment_ids.size();
|
||||
if (num_segments_segment_ids < kDataSize) {
|
||||
for (int64_t i = 0; i < num_segments_segment_ids; i++) {
|
||||
int64_t count = segments_segment_ids[i];
|
||||
int64_t count_no = 0;
|
||||
for (int64_t j = 0; j < i; j++) {
|
||||
count_no += segments_segment_ids[j];
|
||||
}
|
||||
int64_t input_addr_base = count_no * num_compare_per;
|
||||
if (num_compare_per < kDataSize) {
|
||||
for (int64_t j = 0; j < num_compare_per; j++) {
|
||||
int64_t max_init_addr = input_addr_base + j;
|
||||
T1 max_value = input_x_addr[max_init_addr];
|
||||
for (int64_t k = 1; k < count; k++) {
|
||||
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||
if (max_value < input_x_addr[cmp_addr]) {
|
||||
max_value = input_x_addr[cmp_addr];
|
||||
}
|
||||
}
|
||||
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||
}
|
||||
} else {
|
||||
uint32_t min_core_num = 1;
|
||||
int64_t max_core_num = std::max(min_core_num, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||
if (max_core_num > num_compare_per) {
|
||||
max_core_num = num_compare_per;
|
||||
}
|
||||
auto shard_compute = [&](size_t start, size_t end) {
|
||||
for (size_t j = start; j < end; j++) {
|
||||
int64_t max_init_addr = input_addr_base + j;
|
||||
T1 max_value = input_x_addr[max_init_addr];
|
||||
for (int64_t k = 1; k < count; k++) {
|
||||
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||
if (max_value < input_x_addr[cmp_addr]) {
|
||||
max_value = input_x_addr[cmp_addr];
|
||||
}
|
||||
}
|
||||
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||
}
|
||||
};
|
||||
KERNEL_HANDLE_ERROR(
|
||||
CpuKernelUtils::ParallelFor(ctx, num_compare_per, num_compare_per / max_core_num, shard_compute),
|
||||
"SegmentMax Compute failed.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t min_core_num_seg = 1;
|
||||
int64_t max_core_num_seg = std::max(min_core_num_seg, aicpu::CpuKernelUtils::GetCPUNum(ctx) - 2);
|
||||
if (max_core_num_seg > num_segments_segment_ids) {
|
||||
max_core_num_seg = num_segments_segment_ids;
|
||||
}
|
||||
auto shard_compute_seg = [&](size_t start_seg, size_t end_seg) {
|
||||
for (size_t i = start_seg; i < end_seg; i++) {
|
||||
int64_t count = segments_segment_ids[i];
|
||||
int64_t count_no = 0;
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
count_no += segments_segment_ids[j];
|
||||
}
|
||||
int64_t input_addr_base = count_no * num_compare_per;
|
||||
for (int64_t j = 0; j < num_compare_per; j++) {
|
||||
int64_t max_init_addr = input_addr_base + j;
|
||||
T1 max_value = input_x_addr[max_init_addr];
|
||||
for (int64_t k = 1; k < count; k++) {
|
||||
int cmp_addr = max_init_addr + k * num_compare_per;
|
||||
if (max_value < input_x_addr[cmp_addr]) {
|
||||
max_value = input_x_addr[cmp_addr];
|
||||
}
|
||||
}
|
||||
output_data_addr[segment_ids_data_addr[count_no] * num_compare_per + j] = max_value;
|
||||
}
|
||||
}
|
||||
};
|
||||
KERNEL_HANDLE_ERROR(CpuKernelUtils::ParallelFor(ctx, num_segments_segment_ids,
|
||||
num_segments_segment_ids / max_core_num_seg, shard_compute_seg),
|
||||
"SegmentMax Compute failed.");
|
||||
}
|
||||
return KERNEL_STATUS_OK;
|
||||
}
|
||||
REGISTER_CPU_KERNEL(kSegmentMax, SegmentMaxCpuKernel);
|
||||
} // namespace aicpu
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef AICPU_KERNELS_NORMALIZED_SEGMENTMAX_H_
|
||||
#define AICPU_KERNELS_NORMALIZED_SEGMENTMAX_H_
|
||||
|
||||
#include "cpu_ops_kernel.h"
|
||||
|
||||
namespace aicpu {
|
||||
class SegmentMaxCpuKernel : public CpuKernel {
|
||||
public:
|
||||
SegmentMaxCpuKernel() = default;
|
||||
~SegmentMaxCpuKernel() override = default;
|
||||
|
||||
protected:
|
||||
uint32_t Compute(CpuKernelContext &ctx) override;
|
||||
|
||||
private:
|
||||
template <typename T1, typename T2>
|
||||
static uint32_t SegmentMaxCompute(CpuKernelContext &ctx);
|
||||
};
|
||||
} // namespace aicpu
|
||||
#endif
|
|
@ -54,7 +54,6 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An
|
|||
static const std::set<std::string> kMigrateAicpuKernelOps = {mindspore::kAdaptiveAvgPool2DV1OpName,
|
||||
mindspore::kAdaptiveAvgPool2DGradV1OpName,
|
||||
mindspore::kBucketizeOpName,
|
||||
mindspore::kCacheSwapTableOpName,
|
||||
mindspore::kCauchyOpName,
|
||||
mindspore::kChannelShuffleOpName,
|
||||
mindspore::kCholeskyOpName,
|
||||
|
@ -252,7 +251,10 @@ const AnfNodePtr AICpuLibSelectPass::Process(const FuncGraphPtr &graph, const An
|
|||
mindspore::kLogicalXorOpName,
|
||||
mindspore::kLogNormalReverseOpName,
|
||||
mindspore::kBetaincOpName,
|
||||
mindspore::kLessEqualOpName};
|
||||
mindspore::kLessEqualOpName,
|
||||
mindspore::kHSVToRGBOpName,
|
||||
mindspore::kLuSolveOpName,
|
||||
mindspore::kExtractGlimpseOpName};
|
||||
|
||||
static const std::string kEnvOpSoNames = "mindspore_aicpu_kernels";
|
||||
static const std::string kCpuKernelSoName = "mindspore_cpu_kernels";
|
||||
|
|
|
@ -1160,7 +1160,7 @@ class PoissonNLLLoss(LossBase):
|
|||
Args:
|
||||
log_input (bool, optional): Whether use log input. Default: True.
|
||||
full (bool, optional): Whether include the Stirling approximation term in the loss calculation. Default: False.
|
||||
eps (float, optional): Lower bound of `x` when calculating logarithms. Default: 1e-8.
|
||||
eps (float, optional): Lower bound of `x` when calculating logarithms. Default: 1e-08.
|
||||
reduction (str, optional): Apply specific reduction method to the output:
|
||||
'none', 'mean', 'sum'. Default: 'mean'.
|
||||
|
||||
|
|
|
@ -350,3 +350,5 @@ from .hsv_to_rgb import _hsv_to_rgb_aicpu
|
|||
from .im2col import _im2col_aicpu
|
||||
from .lu_solve import _lu_solve_aicpu
|
||||
from .relu_grad_v3 import _relu_grad_v3_aicpu
|
||||
from .resize_bicubic import _resize_bicubic_aicpu
|
||||
from .extract_glimpse import _extract_glimpse_aicpu
|
||||
|
|
|
@ -86,7 +86,7 @@ from .math_ops import (Abs, ACos, Asin, Asinh, AddN, AccumulateNV2, AssignAdd, A
|
|||
MatrixLogarithm, MatrixPower, MatrixSolve, MatrixTriangularSolve, ReduceStd, STFT,
|
||||
NextAfter, Orgqr, Qr, RaggedRange, Digamma, Eig, EuclideanNorm, CompareAndBitpack, ComplexAbs,
|
||||
CumulativeLogsumexp, Gcd, Trace, TridiagonalMatMul, TrilIndices, TriuIndices, Zeta,
|
||||
Roll, Lgamma, Logit)
|
||||
Roll, Lgamma, Logit, MatrixSolveLs)
|
||||
from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSparseLazyAdam, AdamNoUpdateParam,
|
||||
ApplyMomentum, BatchNorm, BiasAdd, Conv2D, Conv3D, Conv2DTranspose, Conv3DTranspose,
|
||||
DepthwiseConv2dNative,
|
||||
|
@ -647,7 +647,8 @@ __all__ = [
|
|||
"SparseSlice",
|
||||
"ResizeLinear1D",
|
||||
"ResizeBicubic",
|
||||
"Logit"
|
||||
"Logit",
|
||||
"MatrixSolveLs"
|
||||
]
|
||||
|
||||
__custom__ = [
|
||||
|
|
|
@ -465,10 +465,10 @@ class NonMaxSuppressionWithOverlaps(Primitive):
|
|||
|
||||
Examples:
|
||||
>>> overlaps = Tensor(np.array([[0.6964692, 0.28613934, 0.22685145, 0.5513148],
|
||||
[0.71946895, 0.42310646, 0.9807642, 0.6848297],
|
||||
[0.4809319, 0.39211753, 0.343178, 0.7290497],
|
||||
[0.43857226, 0.059677895, 0.39804426, 0.7379954]
|
||||
]), mstype.float32)
|
||||
... [0.71946895, 0.42310646, 0.9807642, 0.6848297],
|
||||
... [0.4809319, 0.39211753, 0.343178, 0.7290497],
|
||||
... [0.43857226, 0.059677895, 0.39804426, 0.7379954]
|
||||
... ]), mstype.float32)
|
||||
>>> scores = Tensor(np.array([0.18249173, 0.17545176, 0.53155136, 0.53182757]), mstype.float32)
|
||||
>>> max_output_size = Tensor(4, mstype.int32)
|
||||
>>> overlap_threshold = Tensor(0.1, mstype.float32)
|
||||
|
|
|
@ -260,6 +260,7 @@ class Addcdiv(Primitive):
|
|||
|
||||
Raises:
|
||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` are not the same.
|
||||
ValueError: If `x1` could not be broadcast to `x2`.
|
||||
ValueError: If `value` could not be broadcast to `x1/x2`.
|
||||
ValueError: If `input_data` could not be broadcast to `value*(x1/x2)`.
|
||||
|
@ -303,9 +304,7 @@ class Addcmul(Primitive):
|
|||
|
||||
Raises:
|
||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` is not tensor.
|
||||
TypeError: If dtype of `input_data` is not one of: float32, float16, int32.
|
||||
TypeError: If dtype of `x1` or `x2` is not one of: float32, float16, int32.
|
||||
TypeError: If dtype of `value` is not one of: float32, float16, int32.
|
||||
TypeError: If dtype of `x1`, `x2`, `value`, `input_data` are not the same.
|
||||
ValueError: If `x1` could not be broadcast to `x2`.
|
||||
ValueError: If `value` could not be broadcast to `x1` * `x2`.
|
||||
ValueError: If `input_data` could not be broadcast to `value*(x1*x2)`.
|
||||
|
|
Loading…
Reference in New Issue