add exp fp16 kernel
This commit is contained in:
parent
d24c508bf0
commit
ce62249782
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_NNACL_EXP_PARAMETER_H_
|
||||
#define MINDSPORE_NNACL_EXP_PARAMETER_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct ExpParameter {
|
||||
// Primitive parameter
|
||||
OpParameter op_parameter_;
|
||||
float base_;
|
||||
float scale_;
|
||||
float shift_;
|
||||
// other parameter
|
||||
float in_scale_;
|
||||
float out_scale_;
|
||||
int element_num_;
|
||||
} ExpParameter;
|
||||
|
||||
#endif // MINDSPORE_NNACL_EXP_PARAMETER_H_
|
|
@ -13,12 +13,22 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/exp_fp16.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#if defined(ENABLE_NEON)
|
||||
static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) {
|
||||
static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f};
|
||||
static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f};
|
||||
input = vmaxq_f16(minv, vminq_f16(input, maxv));
|
||||
float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input));
|
||||
float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input));
|
||||
vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(VexpFp32(input_low)), vcvt_f16_f32(VexpFp32(input_high))));
|
||||
}
|
||||
#endif
|
||||
|
||||
void ExpFp16(const float16_t *src, float16_t *dst, int num) {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
|
@ -31,3 +41,42 @@ void ExpFp16(const float16_t *src, float16_t *dst, int num) {
|
|||
single_exp_fp16(src[i], dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id) {
|
||||
int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_);
|
||||
int start = stride * task_id;
|
||||
int end = MSMIN(param->element_num_, start + stride);
|
||||
int num = end - start;
|
||||
|
||||
if (param->scale_ == 1) {
|
||||
ExpFp16(src + start, dst + start, num);
|
||||
} else {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_ARM64
|
||||
MS_FLOAT16X8 scale = MS_MOVQ_F16(param->in_scale_);
|
||||
int count = (num / C8NUM) * C8NUM;
|
||||
for (; i < count; i += C8NUM) {
|
||||
simd_exp_fp16(MS_MULQ_F16(MS_LDQ_F16(src + i), scale), dst + i);
|
||||
}
|
||||
#endif
|
||||
for (; i < num; ++i) {
|
||||
single_exp_fp16(src[i] * param->in_scale_, dst + i);
|
||||
}
|
||||
}
|
||||
if (param->out_scale_ != 1) {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_ARM64
|
||||
MS_FLOAT16X8 scale = MS_MOVQ_F16(param->out_scale_);
|
||||
int count = (num / C8NUM) * C8NUM;
|
||||
for (; i < count; i += C8NUM) {
|
||||
simd_exp_fp16(MS_LDQ_F16(src + i), dst + i);
|
||||
MS_STQ_F16(dst + i, MS_MULQ_F16(MS_LDQ_F16(dst + i), scale));
|
||||
}
|
||||
#endif
|
||||
for (; i < num; ++i) {
|
||||
single_exp_fp16(src[i], dst + i);
|
||||
dst[i] *= param->out_scale_;
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
|
|
@ -13,46 +13,19 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_NNACL_FP16_EXP_H_
|
||||
#define MINDSPORE_NNACL_FP16_EXP_H_
|
||||
#ifndef MINDSPORE_NNACL_FP16_EXP_FP16_H_
|
||||
#define MINDSPORE_NNACL_FP16_EXP_FP16_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/exp_parameter.h"
|
||||
#include "nnacl/fp32/exp_fp32.h"
|
||||
#include "nnacl/intrinsics/ms_simd_instructions_fp16.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void ExpFp16(const float16_t *src, float16_t *dst, int num);
|
||||
|
||||
#if defined(ENABLE_NEON)
|
||||
static inline float32x4_t exp_fp32(float32x4_t input) {
|
||||
static float32x4_t param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f},
|
||||
{1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120},
|
||||
{1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24},
|
||||
{1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6},
|
||||
{0.5f, 0.5f, 0.5f, 0.5f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}};
|
||||
int32x4_t integer = vcvtq_s32_f32(input / param[0]);
|
||||
float32x4_t decimal = input - vcvtq_f32_s32(integer) * param[0];
|
||||
int32x4_t int_exp = vshlq_s32((integer + vmovq_n_s32(127)), vmovq_n_s32(23));
|
||||
float32x4_t decimal_exp =
|
||||
param[5] +
|
||||
decimal * (param[5] + decimal * (param[4] + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
|
||||
decimal_exp = decimal_exp * vld1q_f32((float *)(&int_exp));
|
||||
return decimal_exp;
|
||||
}
|
||||
|
||||
static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) {
|
||||
static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f};
|
||||
static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f};
|
||||
|
||||
input = vmaxq_f16(minv, vminq_f16(input, maxv));
|
||||
float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input));
|
||||
float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input));
|
||||
vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(exp_fp32(input_low)), vcvt_f16_f32(exp_fp32(input_high))));
|
||||
}
|
||||
#endif
|
||||
int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id);
|
||||
|
||||
static inline void single_exp_fp16(float16_t src, float16_t *dst) {
|
||||
static float param[] = {0.693147f, 1.0f / 120, 1.0f / 24, 1.0f / 6, 1.0f / 2, 1.0f};
|
||||
|
@ -64,8 +37,9 @@ static inline void single_exp_fp16(float16_t src, float16_t *dst) {
|
|||
1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
|
||||
*dst = (float16_t)(*((float *)&int_exp) * decimal_exp);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_NNACL_FP16_EXP_H_
|
||||
#endif // MINDSPORE_NNACL_FP16_EXP_FP16_H_
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -19,27 +19,6 @@
|
|||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) {
|
||||
if (parameter->thread_num_ == 0) {
|
||||
return NNACL_PARAM_INVALID;
|
||||
}
|
||||
if (parameter->scale_ == 1) {
|
||||
for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
|
||||
output_data[i] = expf(input_data[i]);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
|
||||
output_data[i] = expf(input_data[i] * parameter->in_scale_);
|
||||
}
|
||||
}
|
||||
if (parameter->out_scale_ != 1) {
|
||||
for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
|
||||
output_data[i] = output_data[i] * parameter->out_scale_;
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
void ExpFp32(const float *src, float *dst, int num) {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_ARM64
|
||||
|
@ -52,3 +31,42 @@ void ExpFp32(const float *src, float *dst, int num) {
|
|||
single_exp(src[i], dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id) {
|
||||
int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_);
|
||||
int start = stride * task_id;
|
||||
int end = MSMIN(param->element_num_, start + stride);
|
||||
int num = end - start;
|
||||
|
||||
if (param->scale_ == 1) {
|
||||
ExpFp32(src + start, dst + start, num);
|
||||
} else {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_ARM64
|
||||
MS_FLOAT32X4 scale = MS_MOVQ_F32(param->in_scale_);
|
||||
int count = (num / C4NUM) * C4NUM;
|
||||
for (; i < count; i += C4NUM) {
|
||||
simd_exp(MS_MULQ_F32(MS_LDQ_F32(src + i), scale), dst + i);
|
||||
}
|
||||
#endif
|
||||
for (; i < num; ++i) {
|
||||
single_exp(src[i] * param->in_scale_, dst + i);
|
||||
}
|
||||
}
|
||||
if (param->out_scale_ != 1) {
|
||||
int i = 0;
|
||||
#ifdef ENABLE_ARM64
|
||||
MS_FLOAT32X4 scale = {param->out_scale_, param->out_scale_, param->out_scale_, param->out_scale_};
|
||||
int count = (num / C4NUM) * C4NUM;
|
||||
for (; i < count; i += C4NUM) {
|
||||
simd_exp(MS_LDQ_F32(src + i), dst + i);
|
||||
MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale));
|
||||
}
|
||||
#endif
|
||||
for (; i < num; ++i) {
|
||||
single_exp(src[i], dst + i);
|
||||
dst[i] *= param->out_scale_;
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -18,48 +18,40 @@
|
|||
#define MINDSPORE_NNACL_FP32_EXP_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/exp_parameter.h"
|
||||
#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
|
||||
#include "nnacl/intrinsics/ms_simd_instructions.h"
|
||||
#endif
|
||||
|
||||
typedef struct ExpParameter {
|
||||
// Primitive parameter
|
||||
OpParameter op_parameter_;
|
||||
float base_;
|
||||
float scale_;
|
||||
float shift_;
|
||||
// other parameter
|
||||
int thread_num_;
|
||||
float in_scale_;
|
||||
float out_scale_;
|
||||
int element_num_;
|
||||
} ExpParameter;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id);
|
||||
|
||||
void ExpFp32(const float *src, float *dst, int num);
|
||||
int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id);
|
||||
|
||||
#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
|
||||
static inline void simd_exp(MS_FLOAT32X4 input, float *dst) {
|
||||
static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f};
|
||||
static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f};
|
||||
static inline MS_FLOAT32X4 VexpFp32(MS_FLOAT32X4 input) {
|
||||
static MS_FLOAT32X4 param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f},
|
||||
{1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120},
|
||||
{1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24},
|
||||
{1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6},
|
||||
{0.5f, 0.5f, 0.5f, 0.5f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}};
|
||||
|
||||
input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv));
|
||||
MS_INT32X4 integer = MS_CVTQPS_EPI32(MS_DIVQ_F32(input, param[0]));
|
||||
MS_FLOAT32X4 decimal = MS_SUBQ_F32(input, MS_MULQ_F32(MS_CVTQEPI32_PS(integer), param[0]));
|
||||
MS_INT32X4 int_exp = MS_SLLIQ_EPI32(MS_ADDQ_EPI32(integer, MS_MOVQ_EPI32(127)), 23);
|
||||
MS_FLOAT32X4 tmp = MS_MULQ_F32(decimal, (MS_ADDQ_F32(param[2], MS_MULQ_F32(decimal, param[1]))));
|
||||
tmp = MS_MULQ_F32(decimal, MS_ADDQ_F32(param[4], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[3], tmp))));
|
||||
MS_FLOAT32X4 decimal_exp = MS_ADDQ_F32(param[5], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[5], tmp)));
|
||||
MS_STQ_F32(dst, MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp)));
|
||||
return MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp));
|
||||
}
|
||||
|
||||
static inline void simd_exp(MS_FLOAT32X4 input, float *dst) {
|
||||
static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f};
|
||||
static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f};
|
||||
input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv));
|
||||
MS_STQ_F32(dst, VexpFp32(input));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ using mindspore::schema::PrimitiveType_ExpFusion;
|
|||
namespace mindspore::lite::micro::nnacl {
|
||||
int ExpFP32Coder::Prepare(CoderContext *context) {
|
||||
exp_parameter_ = reinterpret_cast<ExpParameter *>(parameter_);
|
||||
exp_parameter_->thread_num_ = exp_parameter_->op_parameter_.thread_num_;
|
||||
float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_);
|
||||
exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_;
|
||||
if (exp_parameter_->shift_ == 0) {
|
||||
|
|
|
@ -129,8 +129,8 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const SpliceParame
|
|||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const ExpParameter &exp_parameter) {
|
||||
CodeBaseStruct("ExpParameter", name, exp_parameter.op_parameter_, exp_parameter.base_, exp_parameter.scale_,
|
||||
exp_parameter.shift_, exp_parameter.thread_num_, exp_parameter.in_scale_, exp_parameter.out_scale_,
|
||||
exp_parameter.element_num_);
|
||||
exp_parameter.shift_, exp_parameter.op_parameter_.thread_num_, exp_parameter.in_scale_,
|
||||
exp_parameter.out_scale_, exp_parameter.element_num_);
|
||||
}
|
||||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const StridedSliceParameter &strided_slice_parameter) {
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/fp16/exp_fp16.h"
|
||||
#include "nnacl/fp16/exp_fp16.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_ExpFusion;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int ExpFp16CPUKernel::DoExcute(int task_id) {
|
||||
ExpFusionFp16(reinterpret_cast<float16_t *>(input_addr_), reinterpret_cast<float16_t *>(output_addr_), param_,
|
||||
task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ExpFusion, LiteKernelCreator<ExpFp16CPUKernel>)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32/exp_fp32.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ExpFp16CPUKernel : public ExpCPUKernel {
|
||||
public:
|
||||
explicit ExpFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: ExpCPUKernel(parameter, inputs, outputs, ctx) {}
|
||||
~ExpFp16CPUKernel() override{};
|
||||
|
||||
int DoExcute(int task_id) override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/exp_fp32.h"
|
||||
#include <cmath>
|
||||
#include "include/errorcode.h"
|
||||
|
@ -26,34 +25,31 @@ using mindspore::schema::PrimitiveType_ExpFusion;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
int ExpCPUKernel::Init() {
|
||||
exp_parameter_ = reinterpret_cast<ExpParameter *>(op_parameter_);
|
||||
exp_parameter_->thread_num_ = thread_count_;
|
||||
|
||||
float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_);
|
||||
param_->in_scale_ = param_->scale_ * log_base;
|
||||
if (param_->shift_ == 0) {
|
||||
param_->out_scale_ = 1;
|
||||
} else {
|
||||
if (log_base == 1) {
|
||||
param_->out_scale_ = expf(param_->shift_);
|
||||
} else {
|
||||
param_->out_scale_ = powf(param_->base_, param_->shift_);
|
||||
}
|
||||
}
|
||||
param_->op_parameter_.thread_num_ = ms_context_->thread_num_;
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ExpCPUKernel::ReSize() {
|
||||
exp_parameter_->thread_num_ = thread_count_;
|
||||
float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_);
|
||||
exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_;
|
||||
if (exp_parameter_->shift_ == 0) {
|
||||
exp_parameter_->out_scale_ = 1;
|
||||
} else {
|
||||
if (log_ == 1) {
|
||||
exp_parameter_->out_scale_ = expf(exp_parameter_->shift_);
|
||||
} else {
|
||||
exp_parameter_->out_scale_ = powf(exp_parameter_->base_, exp_parameter_->shift_);
|
||||
}
|
||||
}
|
||||
param_->element_num_ = in_tensors_.front()->ElementsNum();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ExpCPUKernel::DoExcute(int task_id) {
|
||||
Exp(input_addr_, output_addr_, exp_parameter_, task_id);
|
||||
ExpFusionFp32(reinterpret_cast<float *>(input_addr_), reinterpret_cast<float *>(output_addr_), param_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -68,11 +64,10 @@ int ExpRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int ExpCPUKernel::Run() {
|
||||
input_addr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
|
||||
input_addr_ = in_tensors_.front()->MutableData();
|
||||
output_addr_ = out_tensors_.front()->MutableData();
|
||||
|
||||
auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, exp_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, ms_context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_
|
||||
|
||||
|
@ -26,22 +25,20 @@ class ExpCPUKernel : public InnerKernel {
|
|||
public:
|
||||
explicit ExpCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<ExpParameter *>(parameter);
|
||||
}
|
||||
~ExpCPUKernel() override{};
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int DoExcute(int task_id);
|
||||
virtual int DoExcute(int task_id);
|
||||
|
||||
protected:
|
||||
const lite::InnerContext *ctx_ = nullptr;
|
||||
int thread_count_ = 1;
|
||||
ExpParameter *exp_parameter_ = nullptr;
|
||||
|
||||
private:
|
||||
float *input_addr_ = nullptr;
|
||||
float *output_addr_ = nullptr;
|
||||
ExpParameter *param_ = nullptr;
|
||||
void *input_addr_ = nullptr;
|
||||
void *output_addr_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
Loading…
Reference in New Issue