add exp fp16 kernel

2021-07-13 16:06:17 +08:00 · 2021-07-13 16:06:17 +08:00 · ce62249782
parent d24c508bf0
commit ce62249782
11 changed files with 239 additions and 114 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/exp_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/exp_parameter.h
@ -0,0 +1,33 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_EXP_PARAMETER_H_
+#define MINDSPORE_NNACL_EXP_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct ExpParameter {
+  // Primitive parameter
+  OpParameter op_parameter_;
+  float base_;
+  float scale_;
+  float shift_;
+  // other parameter
+  float in_scale_;
+  float out_scale_;
+  int element_num_;
+} ExpParameter;
+
+#endif  // MINDSPORE_NNACL_EXP_PARAMETER_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/exp_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/exp_fp16.c
@ -13,12 +13,22 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 #include "nnacl/fp16/exp_fp16.h"
 #include <math.h>
 #include <string.h>
 #include "nnacl/errorcode.h"

+#if defined(ENABLE_NEON)
+static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) {
+  static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f};
+  static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f};
+  input = vmaxq_f16(minv, vminq_f16(input, maxv));
+  float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input));
+  float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input));
+  vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(VexpFp32(input_low)), vcvt_f16_f32(VexpFp32(input_high))));
+}
+#endif
+
 void ExpFp16(const float16_t *src, float16_t *dst, int num) {
  int i = 0;
 #ifdef ENABLE_NEON
@ -31,3 +41,42 @@ void ExpFp16(const float16_t *src, float16_t *dst, int num) {
    single_exp_fp16(src[i], dst + i);
  }
 }
+
+int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id) {
+  int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_);
+  int start = stride * task_id;
+  int end = MSMIN(param->element_num_, start + stride);
+  int num = end - start;
+
+  if (param->scale_ == 1) {
+    ExpFp16(src + start, dst + start, num);
+  } else {
+    int i = 0;
+#ifdef ENABLE_ARM64
+    MS_FLOAT16X8 scale = MS_MOVQ_F16(param->in_scale_);
+    int count = (num / C8NUM) * C8NUM;
+    for (; i < count; i += C8NUM) {
+      simd_exp_fp16(MS_MULQ_F16(MS_LDQ_F16(src + i), scale), dst + i);
+    }
+#endif
+    for (; i < num; ++i) {
+      single_exp_fp16(src[i] * param->in_scale_, dst + i);
+    }
+  }
+  if (param->out_scale_ != 1) {
+    int i = 0;
+#ifdef ENABLE_ARM64
+    MS_FLOAT16X8 scale = MS_MOVQ_F16(param->out_scale_);
+    int count = (num / C8NUM) * C8NUM;
+    for (; i < count; i += C8NUM) {
+      simd_exp_fp16(MS_LDQ_F16(src + i), dst + i);
+      MS_STQ_F16(dst + i, MS_MULQ_F16(MS_LDQ_F16(dst + i), scale));
+    }
+#endif
+    for (; i < num; ++i) {
+      single_exp_fp16(src[i], dst + i);
+      dst[i] *= param->out_scale_;
+    }
+  }
+  return NNACL_OK;
+}
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/exp_fp16.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/exp_fp16.h
@ -13,46 +13,19 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
-#ifndef MINDSPORE_NNACL_FP16_EXP_H_
-#define MINDSPORE_NNACL_FP16_EXP_H_
+#ifndef MINDSPORE_NNACL_FP16_EXP_FP16_H_
+#define MINDSPORE_NNACL_FP16_EXP_FP16_H_

 #include "nnacl/op_base.h"
+#include "nnacl/exp_parameter.h"
+#include "nnacl/fp32/exp_fp32.h"
 #include "nnacl/intrinsics/ms_simd_instructions_fp16.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
 void ExpFp16(const float16_t *src, float16_t *dst, int num);
-
-#if defined(ENABLE_NEON)
-static inline float32x4_t exp_fp32(float32x4_t input) {
-  static float32x4_t param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f},
-                                {1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120},
-                                {1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24},
-                                {1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6},
-                                {0.5f, 0.5f, 0.5f, 0.5f},
-                                {1.0f, 1.0f, 1.0f, 1.0f}};
-  int32x4_t integer = vcvtq_s32_f32(input / param[0]);
-  float32x4_t decimal = input - vcvtq_f32_s32(integer) * param[0];
-  int32x4_t int_exp = vshlq_s32((integer + vmovq_n_s32(127)), vmovq_n_s32(23));
-  float32x4_t decimal_exp =
-    param[5] +
-    decimal * (param[5] + decimal * (param[4] + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
-  decimal_exp = decimal_exp * vld1q_f32((float *)(&int_exp));
-  return decimal_exp;
-}
-
-static inline void simd_exp_fp16(float16x8_t input, float16_t *dst) {
-  static float16x8_t maxv = {88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f, 88.0f};
-  static float16x8_t minv = {-88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f, -88.0f};
-
-  input = vmaxq_f16(minv, vminq_f16(input, maxv));
-  float32x4_t input_low = vcvt_f32_f16(vget_low_f16(input));
-  float32x4_t input_high = vcvt_f32_f16(vget_high_f16(input));
-  vst1q_f16(dst, vcombine_f16(vcvt_f16_f32(exp_fp32(input_low)), vcvt_f16_f32(exp_fp32(input_high))));
-}
-#endif
+int ExpFusionFp16(const float16_t *src, float16_t *dst, const ExpParameter *param, int task_id);

 static inline void single_exp_fp16(float16_t src, float16_t *dst) {
  static float param[] = {0.693147f, 1.0f / 120, 1.0f / 24, 1.0f / 6, 1.0f / 2, 1.0f};
@ -64,8 +37,9 @@ static inline void single_exp_fp16(float16_t src, float16_t *dst) {
    1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
  *dst = (float16_t)(*((float *)&int_exp) * decimal_exp);
 }
+
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_NNACL_FP16_EXP_H_
+#endif  // MINDSPORE_NNACL_FP16_EXP_FP16_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -19,27 +19,6 @@
 #include <string.h>
 #include "nnacl/errorcode.h"

-int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) {
-  if (parameter->thread_num_ == 0) {
-    return NNACL_PARAM_INVALID;
-  }
-  if (parameter->scale_ == 1) {
-    for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
-      output_data[i] = expf(input_data[i]);
-    }
-  } else {
-    for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
-      output_data[i] = expf(input_data[i] * parameter->in_scale_);
-    }
-  }
-  if (parameter->out_scale_ != 1) {
-    for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
-      output_data[i] = output_data[i] * parameter->out_scale_;
-    }
-  }
-  return NNACL_OK;
-}
-
 void ExpFp32(const float *src, float *dst, int num) {
  int i = 0;
 #ifdef ENABLE_ARM64
@ -52,3 +31,42 @@ void ExpFp32(const float *src, float *dst, int num) {
    single_exp(src[i], dst + i);
  }
 }
+
+int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id) {
+  int stride = UP_DIV(param->element_num_, param->op_parameter_.thread_num_);
+  int start = stride * task_id;
+  int end = MSMIN(param->element_num_, start + stride);
+  int num = end - start;
+
+  if (param->scale_ == 1) {
+    ExpFp32(src + start, dst + start, num);
+  } else {
+    int i = 0;
+#ifdef ENABLE_ARM64
+    MS_FLOAT32X4 scale = MS_MOVQ_F32(param->in_scale_);
+    int count = (num / C4NUM) * C4NUM;
+    for (; i < count; i += C4NUM) {
+      simd_exp(MS_MULQ_F32(MS_LDQ_F32(src + i), scale), dst + i);
+    }
+#endif
+    for (; i < num; ++i) {
+      single_exp(src[i] * param->in_scale_, dst + i);
+    }
+  }
+  if (param->out_scale_ != 1) {
+    int i = 0;
+#ifdef ENABLE_ARM64
+    MS_FLOAT32X4 scale = {param->out_scale_, param->out_scale_, param->out_scale_, param->out_scale_};
+    int count = (num / C4NUM) * C4NUM;
+    for (; i < count; i += C4NUM) {
+      simd_exp(MS_LDQ_F32(src + i), dst + i);
+      MS_STQ_F32(dst + i, MS_MULQ_F32(MS_LDQ_F32(dst + i), scale));
+    }
+#endif
+    for (; i < num; ++i) {
+      single_exp(src[i], dst + i);
+      dst[i] *= param->out_scale_;
+    }
+  }
+  return NNACL_OK;
+}
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -18,48 +18,40 @@
 #define MINDSPORE_NNACL_FP32_EXP_H_

 #include "nnacl/op_base.h"
+#include "nnacl/exp_parameter.h"
 #if defined(ENABLE_ARM) || defined(ENABLE_SSE)
 #include "nnacl/intrinsics/ms_simd_instructions.h"
 #endif

-typedef struct ExpParameter {
-  // Primitive parameter
-  OpParameter op_parameter_;
-  float base_;
-  float scale_;
-  float shift_;
-  // other parameter
-  int thread_num_;
-  float in_scale_;
-  float out_scale_;
-  int element_num_;
-} ExpParameter;
-
 #ifdef __cplusplus
 extern "C" {
 #endif
-int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id);
+
 void ExpFp32(const float *src, float *dst, int num);
+int ExpFusionFp32(const float *src, float *dst, const ExpParameter *param, int task_id);

 #if defined(ENABLE_ARM) || defined(ENABLE_SSE)
-static inline void simd_exp(MS_FLOAT32X4 input, float *dst) {
-  static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f};
-  static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f};
+static inline MS_FLOAT32X4 VexpFp32(MS_FLOAT32X4 input) {
  static MS_FLOAT32X4 param[] = {{0.693147f, 0.693147f, 0.693147f, 0.693147f},
                                 {1.0f / 120, 1.0f / 120, 1.0f / 120, 1.0f / 120},
                                 {1.0f / 24, 1.0f / 24, 1.0f / 24, 1.0f / 24},
                                 {1.0f / 6, 1.0f / 6, 1.0f / 6, 1.0f / 6},
                                 {0.5f, 0.5f, 0.5f, 0.5f},
                                 {1.0f, 1.0f, 1.0f, 1.0f}};
-
-  input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv));
  MS_INT32X4 integer = MS_CVTQPS_EPI32(MS_DIVQ_F32(input, param[0]));
  MS_FLOAT32X4 decimal = MS_SUBQ_F32(input, MS_MULQ_F32(MS_CVTQEPI32_PS(integer), param[0]));
  MS_INT32X4 int_exp = MS_SLLIQ_EPI32(MS_ADDQ_EPI32(integer, MS_MOVQ_EPI32(127)), 23);
  MS_FLOAT32X4 tmp = MS_MULQ_F32(decimal, (MS_ADDQ_F32(param[2], MS_MULQ_F32(decimal, param[1]))));
  tmp = MS_MULQ_F32(decimal, MS_ADDQ_F32(param[4], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[3], tmp))));
  MS_FLOAT32X4 decimal_exp = MS_ADDQ_F32(param[5], MS_MULQ_F32(decimal, MS_ADDQ_F32(param[5], tmp)));
-  MS_STQ_F32(dst, MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp)));
+  return MS_MULQ_F32(decimal_exp, MS_CAST_F32_S32(int_exp));
+}
+
+static inline void simd_exp(MS_FLOAT32X4 input, float *dst) {
+  static MS_FLOAT32X4 maxv = {88.0f, 88.0f, 88.0f, 88.0f};
+  static MS_FLOAT32X4 minv = {-88.0f, -88.0f, -88.0f, -88.0f};
+  input = MS_MAXQ_F32(minv, MS_MINQ_F32(input, maxv));
+  MS_STQ_F32(dst, VexpFp32(input));
 }
 #endif

--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/exp_fp32_coder.cc
@ -24,7 +24,6 @@ using mindspore::schema::PrimitiveType_ExpFusion;
 namespace mindspore::lite::micro::nnacl {
 int ExpFP32Coder::Prepare(CoderContext *context) {
  exp_parameter_ = reinterpret_cast<ExpParameter *>(parameter_);
-  exp_parameter_->thread_num_ = exp_parameter_->op_parameter_.thread_num_;
  float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_);
  exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_;
  if (exp_parameter_->shift_ == 0) {
--- a/mindspore/lite/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
+++ b/mindspore/lite/micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
@ -129,8 +129,8 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const SpliceParame

 void NNaclFp32Serializer::CodeStruct(const std::string &name, const ExpParameter &exp_parameter) {
  CodeBaseStruct("ExpParameter", name, exp_parameter.op_parameter_, exp_parameter.base_, exp_parameter.scale_,
-                 exp_parameter.shift_, exp_parameter.thread_num_, exp_parameter.in_scale_, exp_parameter.out_scale_,
-                 exp_parameter.element_num_);
+                 exp_parameter.shift_, exp_parameter.op_parameter_.thread_num_, exp_parameter.in_scale_,
+                 exp_parameter.out_scale_, exp_parameter.element_num_);
 }

 void NNaclFp32Serializer::CodeStruct(const std::string &name, const StridedSliceParameter &strided_slice_parameter) {
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc
@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/fp16/exp_fp16.h"
+#include "nnacl/fp16/exp_fp16.h"
+#include "include/errorcode.h"
+#include "src/kernel_registry.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_ExpFusion;
+
+namespace mindspore::kernel {
+
+int ExpFp16CPUKernel::DoExcute(int task_id) {
+  ExpFusionFp16(reinterpret_cast<float16_t *>(input_addr_), reinterpret_cast<float16_t *>(output_addr_), param_,
+                task_id);
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ExpFusion, LiteKernelCreator<ExpFp16CPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.h
@ -0,0 +1,34 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
+
+#include <vector>
+#include "src/runtime/kernel/arm/fp32/exp_fp32.h"
+
+namespace mindspore::kernel {
+class ExpFp16CPUKernel : public ExpCPUKernel {
+ public:
+  explicit ExpFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : ExpCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~ExpFp16CPUKernel() override{};
+
+  int DoExcute(int task_id) override;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_EXP_FP16_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,7 +13,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 #include "src/runtime/kernel/arm/fp32/exp_fp32.h"
 #include <cmath>
 #include "include/errorcode.h"
@ -26,34 +25,31 @@ using mindspore::schema::PrimitiveType_ExpFusion;

 namespace mindspore::kernel {
 int ExpCPUKernel::Init() {
-  exp_parameter_ = reinterpret_cast<ExpParameter *>(op_parameter_);
-  exp_parameter_->thread_num_ = thread_count_;
-
+  float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_);
+  param_->in_scale_ = param_->scale_ * log_base;
+  if (param_->shift_ == 0) {
+    param_->out_scale_ = 1;
+  } else {
+    if (log_base == 1) {
+      param_->out_scale_ = expf(param_->shift_);
+    } else {
+      param_->out_scale_ = powf(param_->base_, param_->shift_);
+    }
+  }
+  param_->op_parameter_.thread_num_ = ms_context_->thread_num_;
  if (!InferShapeDone()) {
    return RET_OK;
  }
-
  return ReSize();
 }

 int ExpCPUKernel::ReSize() {
-  exp_parameter_->thread_num_ = thread_count_;
-  float log_ = (exp_parameter_->base_ == -1) ? 1 : logf(exp_parameter_->base_);
-  exp_parameter_->in_scale_ = exp_parameter_->scale_ * log_;
-  if (exp_parameter_->shift_ == 0) {
-    exp_parameter_->out_scale_ = 1;
-  } else {
-    if (log_ == 1) {
-      exp_parameter_->out_scale_ = expf(exp_parameter_->shift_);
-    } else {
-      exp_parameter_->out_scale_ = powf(exp_parameter_->base_, exp_parameter_->shift_);
-    }
-  }
+  param_->element_num_ = in_tensors_.front()->ElementsNum();
  return RET_OK;
 }

 int ExpCPUKernel::DoExcute(int task_id) {
-  Exp(input_addr_, output_addr_, exp_parameter_, task_id);
+  ExpFusionFp32(reinterpret_cast<float *>(input_addr_), reinterpret_cast<float *>(output_addr_), param_, task_id);
  return RET_OK;
 }

@ -68,11 +64,10 @@ int ExpRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int ExpCPUKernel::Run() {
-  input_addr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
-  output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
-  exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
+  input_addr_ = in_tensors_.front()->MutableData();
+  output_addr_ = out_tensors_.front()->MutableData();

-  auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, exp_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->ms_context_, ExpRun, this, ms_context_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,7 +13,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXP_H_

@ -26,22 +25,20 @@ class ExpCPUKernel : public InnerKernel {
 public:
  explicit ExpCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {}
+      : InnerKernel(parameter, inputs, outputs, ctx) {
+    param_ = reinterpret_cast<ExpParameter *>(parameter);
+  }
  ~ExpCPUKernel() override{};

  int Init() override;
  int ReSize() override;
  int Run() override;
-  int DoExcute(int task_id);
+  virtual int DoExcute(int task_id);

 protected:
-  const lite::InnerContext *ctx_ = nullptr;
-  int thread_count_ = 1;
-  ExpParameter *exp_parameter_ = nullptr;
-
- private:
-  float *input_addr_ = nullptr;
-  float *output_addr_ = nullptr;
+  ExpParameter *param_ = nullptr;
+  void *input_addr_ = nullptr;
+  void *output_addr_ = nullptr;
 };
 }  // namespace mindspore::kernel