!11407 [MSLITE][Develop] rewrite layer_norm and instance_norm converter and runtime

From: @yangruoqi713 Reviewed-by: Signed-off-by:
2021-01-22 09:40:23 +08:00 · 2021-01-22 09:40:23 +08:00 · 0836a0a72a
parent 2beb916bf5 9272ec5e29
commit 0836a0a72a
25 changed files with 466 additions and 329 deletions
--- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.c
@ -0,0 +1,84 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "nnacl/fp32/instance_norm_fp32.h"
+#include <math.h>
+#include "nnacl/errorcode.h"
+#include "nnacl/op_base.h"
+
+int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
+                 const InstanceNormParameter *param, size_t task_id) {
+  if (src_data == NULL || dst_data == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_);
+  int channel_begin = task_id * channel_step;
+  int channel_end = MSMIN(channel_begin + channel_step, param->channel_);
+
+  for (int b = 0; b < param->batch_; b++) {
+    const float *src_b = src_data + b * param->channel_ * param->inner_size_;
+    float *dst_b = dst_data + b * param->channel_ * param->inner_size_;
+    for (int c = channel_begin; c < channel_end; c++) {
+      const float *src = src_b + c * param->inner_size_;
+      float *dst = dst_b + c * param->inner_size_;
+      float mean = 0.0f;
+      float square_mean = 0.0f;
+
+      int index = 0;
+#ifdef ENABLE_NEON
+      float32x4_t sum = vdupq_n_f32(0);
+      float32x4_t square_sum = vdupq_n_f32(0);
+      for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
+        float32x4_t srcv = vld1q_f32(src + index);
+        float32x4_t squarev = vmulq_f32(srcv, srcv);
+        sum = vaddq_f32(sum, srcv);
+        square_sum = vaddq_f32(square_sum, squarev);
+      }
+      mean = sum[0] + sum[1] + sum[2] + sum[3];
+      square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
+#endif
+      for (; index < param->inner_size_; index++) {
+        mean += src[index];
+        square_mean += src[index] * src[index];
+      }
+
+      mean /= (float)param->inner_size_;
+      square_mean /= (float)param->inner_size_;
+      const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
+
+      index = 0;
+#ifdef ENABLE_NEON
+      float32x4_t meanv = vdupq_n_f32(mean);
+      float32x4_t denov = vdupq_n_f32(deno);
+      for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
+        float32x4_t srcv = vld1q_f32(src + index);
+        float32x4_t outv = vsubq_f32(srcv, meanv);
+        outv = vmulq_f32(outv, denov);
+
+        float32x4_t gammav = vdupq_n_f32(gamma_data[c]);
+        float32x4_t betav = vdupq_n_f32(beta_data[c]);
+        outv = vmulq_f32(outv, gammav);
+        outv = vaddq_f32(outv, betav);
+        vst1q_f32(dst + index, outv);
+      }
+#endif
+      for (; index < param->inner_size_; index++) {
+        dst[index] = (src[index] - mean) * deno;
+        dst[index] = dst[index] * gamma_data[c] + beta_data[c];
+      }
+    }
+  }
+  return NNACL_OK;
+}
--- a/mindspore/lite/src/ops/populate/layer_norm_populate.h
+++ b/mindspore/lite/src/ops/populate/layer_norm_populate.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,14 +13,20 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
-#define MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
+#ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
+#define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_

-#include "src/ops/arithmetic.h"
+#include "nnacl/op_base.h"
+#include "nnacl/instance_norm_parameter.h"

-namespace mindspore {
-namespace lite {
-OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive);
-}  // namespace lite
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
+                 const InstanceNormParameter *param, size_t task_id);
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
--- a/mindspore/lite/nnacl/fp32/layer_norm_fp32.c
+++ b/mindspore/lite/nnacl/fp32/layer_norm_fp32.c
@ -18,100 +18,81 @@
 #include "nnacl/errorcode.h"
 #include "nnacl/op_base.h"

-int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
-              const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
-              size_t task_id, size_t thread_num) {
-  if (src_data == NULL || dst_data == NULL) {
-    return NNACL_NULL_PTR;
+void LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square_mean) {
+  int index = 0;
+#ifdef ENABLE_NEON
+  float32x4_t sum = vdupq_n_f32(0);
+  float32x4_t square_sum = vdupq_n_f32(0);
+  for (; index < num - C4NUM; index += C4NUM) {
+    float32x4_t srcv = vld1q_f32(src + index);
+    float32x4_t squarev = vmulq_f32(srcv, srcv);
+    sum = vaddq_f32(sum, srcv);
+    square_sum = vaddq_f32(square_sum, squarev);
  }
-  if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
+  *mean = sum[0] + sum[1] + sum[2] + sum[3];
+  *square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
+#endif
+  for (; index < num; index++) {
+    *mean += src[index];
+    *square_mean += src[index] * src[index];
+  }
+
+  *mean /= (float)num;
+  *square_mean /= (float)num;
+}
+
+void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data, const float *beta_data, int num,
+                           const float mean, const float deno) {
+  int index = 0;
+#ifdef ENABLE_NEON
+  float32x4_t meanv = vdupq_n_f32(mean);
+  float32x4_t denov = vdupq_n_f32(deno);
+  for (; index < num - C4NUM; index += C4NUM) {
+    float32x4_t srcv = vld1q_f32(src + index);
+    float32x4_t outv = vsubq_f32(srcv, meanv);
+    outv = vmulq_f32(outv, denov);
+    float32x4_t gammav = vld1q_f32(gamma_data + index);
+    float32x4_t betav = vld1q_f32(beta_data + index);
+    outv = vmulq_f32(outv, gammav);
+    outv = vaddq_f32(outv, betav);
+    vst1q_f32(dst + index, outv);
+  }
+#endif
+  for (; index < num; index++) {
+    dst[index] = (src[index] - mean) * (deno);
+    dst[index] = dst[index] * gamma_data[index] + beta_data[index];
+  }
+}
+
+int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
+              LayerNormParameter *param, size_t task_id) {
+  if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
    return NNACL_NULL_PTR;
  }

-  for (size_t j = task_id; j < outer_size; j += thread_num) {
-    const float *src = src_data + j * inner_size;
-    float *dst = dst_data + j * inner_size;
+  int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
+  int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
+  for (int i = task_id * step; i < thread_end; i++) {
+    const float *src_norm = src_data + i * param->norm_inner_size_;
+    float *dst_norm = dst_data + i * param->norm_inner_size_;
    float mean = 0.0f;
    float square_mean = 0.0f;
+    LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &mean, &square_mean);
+    const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);

-    int index = 0;
-#ifdef ENABLE_NEON
-    float32x4_t sum = vdupq_n_f32(0);
-    float32x4_t square_sum = vdupq_n_f32(0);
-    for (; index < inner_size - C8NUM; index += C8NUM) {
-      float32x4_t srcv1 = vld1q_f32(src + index);
-      float32x4_t srcv2 = vld1q_f32(src + index + 4);
-      float32x4_t squarev1 = vmulq_f32(srcv1, srcv1);
-      float32x4_t squarev2 = vmulq_f32(srcv2, srcv2);
-      sum = vaddq_f32(sum, srcv1);
-      sum = vaddq_f32(sum, srcv2);
-      square_sum = vaddq_f32(square_sum, squarev1);
-      square_sum = vaddq_f32(square_sum, squarev2);
-    }
-    mean = sum[0] + sum[1] + sum[2] + sum[3];
-    square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
-#endif
-    for (; index < inner_size; index++) {
-      mean += src[index];
-      square_mean += src[index] * src[index];
-    }
-
-    mean /= (float)inner_size;
-    square_mean /= (float)inner_size;
-    const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
-
-    index = 0;
-#ifdef ENABLE_NEON
-    float32x4_t meanv = vdupq_n_f32(mean);
-    float32x4_t denov = vdupq_n_f32(deno);
-    if (elementwise_mode != 0) {
-      for (; index < inner_size - C8NUM; index += C8NUM) {
-        float32x4_t srcv1 = vld1q_f32(src + index);
-        float32x4_t srcv2 = vld1q_f32(src + index + 4);
-        float32x4_t outv1 = vsubq_f32(srcv1, meanv);
-        float32x4_t outv2 = vsubq_f32(srcv2, meanv);
-        outv1 = vmulq_f32(outv1, denov);
-        outv2 = vmulq_f32(outv2, denov);
-        if (elementwise_mode == 1) {
-          float32x4_t gammav1 = vdupq_n_f32(gamma_data[j]);
-          float32x4_t betav1 = vdupq_n_f32(beta_data[j]);
-          outv1 = vmulq_f32(outv1, gammav1);
-          outv2 = vmulq_f32(outv2, gammav1);
-          outv1 = vaddq_f32(outv1, betav1);
-          outv2 = vaddq_f32(outv2, betav1);
-        } else {
-          float32x4_t gammav1 = vld1q_f32(gamma_data + index);
-          float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4);
-          float32x4_t betav1 = vld1q_f32(beta_data + index);
-          float32x4_t betav2 = vld1q_f32(beta_data + index + 4);
-          outv1 = vmulq_f32(outv1, gammav1);
-          outv2 = vmulq_f32(outv2, gammav2);
-          outv1 = vaddq_f32(outv1, betav1);
-          outv2 = vaddq_f32(outv2, betav2);
-        }
-        vst1q_f32(dst + index, outv1);
-        vst1q_f32(dst + index + 4, outv2);
+    if (param->norm_outer_size_ <= param->params_outer_size_) {
+      for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
+        const float *src_param = src_norm + x * param->params_inner_size_;
+        float *dst_param = dst_norm + x * param->params_inner_size_;
+        LayerNormGammaAndBeta(dst_param, src_param, gamma_data, beta_data, param->params_inner_size_, mean, deno);
      }
    } else {
-      for (; index < inner_size - C8NUM; index += C8NUM) {
-        float32x4_t srcv1 = vld1q_f32(src + index);
-        float32x4_t srcv2 = vld1q_f32(src + index + 4);
-        float32x4_t outv1 = vsubq_f32(srcv1, meanv);
-        float32x4_t outv2 = vsubq_f32(srcv2, meanv);
-        outv1 = vmulq_f32(outv1, denov);
-        outv2 = vmulq_f32(outv2, denov);
-        vst1q_f32(dst + index, outv1);
-        vst1q_f32(dst + index + 4, outv2);
-      }
-    }
-#endif
-    for (; index < inner_size; index++) {
-      dst[index] = (src[index] - mean) * deno;
-      if (elementwise_mode == 1) {
-        dst[index] = dst[index] * gamma_data[j] + beta_data[j];
-      } else if (elementwise_mode == 2) {
-        dst[index] = dst[index] * gamma_data[index] + beta_data[index];
-      }
+      int x = i / param->norm_outer_size_;
+      const float *src_param = src_norm + x * param->params_inner_size_;
+      float *dst_param = dst_norm + x * param->params_inner_size_;
+      const float *gamma = gamma_data + x * param->params_inner_size_;
+      const float *beta = beta_data + x * param->params_inner_size_;
+      LayerNormGammaAndBeta(dst_param, src_param, gamma, beta, param->norm_inner_size_, mean, deno);
    }
  }
  return NNACL_OK;
--- a/mindspore/lite/nnacl/fp32/layer_norm_fp32.h
+++ b/mindspore/lite/nnacl/fp32/layer_norm_fp32.h
@ -23,9 +23,8 @@
 extern "C" {
 #endif

-int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
-              const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
-              size_t task_id, size_t thread_num);
+int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
+              LayerNormParameter *param, size_t task_id);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/instance_norm_parameter.h
+++ b/mindspore/lite/nnacl/instance_norm_parameter.h
@ -23,9 +23,10 @@ typedef struct InstanceNormParameter {
  // Primitive parameter
  OpParameter op_parameter_;
  float epsilon_;
-  float momentum_;
  // shape correlative
+  int batch_;
  int channel_;
+  int inner_size_;
 } InstanceNormParameter;

 #endif  // MINDSPORE_LITE_NNACL_INSTANCE_NORM_PARAMETER_H_
--- a/mindspore/lite/nnacl/int8/layer_norm_int8.c
+++ b/mindspore/lite/nnacl/int8/layer_norm_int8.c
@ -16,45 +16,58 @@

 #include "nnacl/int8/layer_norm_int8.h"

+void LayerNormGammaAndBetaInt8(int8_t *dst, const int8_t *src, const float *gamma_data, const float *beta_data,
+                               LayerNormQuantArg *quant, int num, const float mean, const float deno) {
+  for (int i = 0; i < num; i++) {
+    float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
+    float fp32_dst = (fp32_src - mean) * deno;
+    fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
+    int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
+    dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
+  }
+}
+
 /*
 * origin : (x-mean) / sqrt(variance + epsilon)  * gamma + beta
 * quant  : (x-mean) / sqrt(sum(x * x) - mean * mean) * gamma + beta
 *
 * */
 int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
-                  enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
-                  float epsilon) {
-  if (src_data == NULL || dst_data == NULL) {
+                  LayerNormParameter *param, LayerNormQuantArg *quant, int task_id) {
+  if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
    return NNACL_NULL_PTR;
  }

-  if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
-    return NNACL_NULL_PTR;
-  }
-
-  for (int out_index = 0; out_index < outer_size; out_index++) {
-    const int8_t *src = src_data + out_index * inner_size;
-    int8_t *dst = dst_data + out_index * inner_size;
+  int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
+  int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
+  for (int i = task_id * step; i < thread_end; i++) {
+    const int8_t *src_norm = src_data + i * param->norm_inner_size_;
+    int8_t *dst_norm = dst_data + i * param->norm_inner_size_;
    float mean = 0.0f;
    float square_mean = 0.0f;
-    for (int i = 0; i < inner_size; i++) {
-      float float_src = (src[i] - quant->in_zp_) * quant->in_scale_;
+    for (int j = 0; j < param->norm_inner_size_; j++) {
+      float float_src = (src_norm[j] - quant->in_zp_) * quant->in_scale_;
      mean += float_src;
      square_mean += float_src * float_src;
    }
-    mean /= (float)inner_size;
-    square_mean /= (float)inner_size;
-    const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
-    for (int i = 0; i < inner_size; i++) {
-      float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
-      float fp32_dst = (fp32_src - mean) * deno;
-      if (elementwise_mode == 1) {
-        fp32_dst = fp32_dst * gamma_data[out_index] + beta_data[out_index];
-      } else if (elementwise_mode == 2) {
-        fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
+    mean /= (float)param->norm_inner_size_;
+    square_mean /= (float)param->norm_inner_size_;
+    const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
+
+    if (param->norm_outer_size_ <= param->params_outer_size_) {
+      for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
+        const int8_t *src_param = src_norm + x * param->params_inner_size_;
+        int8_t *dst_param = dst_norm + x * param->params_inner_size_;
+        LayerNormGammaAndBetaInt8(dst_param, src_param, gamma_data, beta_data, quant, param->norm_inner_size_, mean,
+                                  deno);
      }
-      int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
-      dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
+    } else {
+      int x = i / param->norm_outer_size_;
+      const int8_t *src_param = src_norm + x * param->params_inner_size_;
+      int8_t *dst_param = dst_norm + x * param->params_inner_size_;
+      const float *gamma = gamma_data + x * param->params_inner_size_;
+      const float *beta = beta_data + x * param->params_inner_size_;
+      LayerNormGammaAndBetaInt8(dst_param, src_param, gamma, beta, quant, param->norm_inner_size_, mean, deno);
    }
  }
  return NNACL_OK;
--- a/mindspore/lite/nnacl/int8/layer_norm_int8.h
+++ b/mindspore/lite/nnacl/int8/layer_norm_int8.h
@ -25,8 +25,7 @@ extern "C" {
 #endif

 int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
-                  enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
-                  float epsilon);
+                  LayerNormParameter *param, LayerNormQuantArg *quant, int task_id);

 #ifdef __cplusplus
 }
--- a/mindspore/lite/nnacl/layer_norm_parameter.h
+++ b/mindspore/lite/nnacl/layer_norm_parameter.h
@ -24,10 +24,15 @@ typedef struct LayerNormParameter {
  // Primitive parameter
  OpParameter op_parameter_;
  float epsilon_;
-  enum ElementwiseMode elementwise_mode_;
+  int begin_norm_axis_;
+  int begin_params_axis_;
  // shape correlative
-  int normalized_shape_[8];
+  int norm_inner_size_;
+  int norm_outer_size_;
+  int params_inner_size_;
+  int params_outer_size_;
  int normalized_dims_;
+  int normalized_shape_[8];
  // other parameter
  int thread_count_;
  int thread_outsize_;
--- a/mindspore/lite/schema/ops.fbs
+++ b/mindspore/lite/schema/ops.fbs
@ -17,7 +17,7 @@
 namespace mindspore.schema;

 enum ResizeMethod: byte {
-    UNKNOW = -1,
+    UNKNOWN = -1,
    LINEAR = 0,
    NEAREST = 1,
    CUBIC = 2
@ -80,7 +80,7 @@ enum ActivationType : byte {
    HARD_TANH = 16,
    SIGN = 17,
    SWISH = 18,
-    UNKNOW = 19
+    UNKNOWN = 19
 }
 enum ActivationGradType : byte {
    NO_ACTIVATION = 0,
@ -99,7 +99,7 @@ enum ActivationGradType : byte {
    HSIGMOID = 13,
    THRESHOLDRELU = 14,
    LINEAR = 15,
-    UNKNOW = 16
+    UNKNOWN = 16
 }
 enum ReduceType : byte {
    REDUCE_MAX = 0,
@ -109,7 +109,7 @@ enum ReduceType : byte {
    REDUCE_LOG_SUM_EXP = 4,
    REDUCE_PROD = 5,
    REDUCE_SUM = 6,
-    UNKNOW = 7
+    UNKNOWN = 7
 }

 enum PoolMode : byte {
@ -121,7 +121,7 @@ enum EltwiseMode : byte {
    PROD = 0,
    SUM = 1,
    MAXIMUM = 2,
-    UNKNOW = 3
+    UNKNOWN = 3
 }

 enum PadMode : byte {
@ -1144,9 +1144,9 @@ table Identity {
 }

 table LayerNorm {
-    normalizedShape : [int];
+    begin_norm_axis : int;
+    begin_params_axis : int;
    epsilon : float = 0.00001;
-    elementwiseAffine : bool;
 }

 table While {
--- a/mindspore/lite/src/ops/layer_norm.cc
+++ b/mindspore/lite/src/ops/layer_norm.cc
@ -22,19 +22,14 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-std::vector<int> LayerNorm::GetNormalizedShape() const {
-  return this->primitive_->value.AsLayerNorm()->normalizedShape;
-}
 float LayerNorm::GetEpsilon() const { return this->primitive_->value.AsLayerNorm()->epsilon; }
-bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value.AsLayerNorm()->elementwiseAffine; }
+int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value.AsLayerNorm()->begin_norm_axis; }
+int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value.AsLayerNorm()->begin_params_axis; }

-void LayerNorm::SetNormalizedShape(const std::vector<int> &normalizedShape) {
-  this->primitive_->value.AsLayerNorm()->normalizedShape = normalizedShape;
-}
 void LayerNorm::SetEpsilon(float epsilon) { this->primitive_->value.AsLayerNorm()->epsilon = epsilon; }
-void LayerNorm::SetElementwiseAffine(bool elementwiseAffine) {
-  this->primitive_->value.AsLayerNorm()->elementwiseAffine = elementwiseAffine;
-}
+void LayerNorm::SetBeginNormAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_norm_axis = axis; }
+void LayerNorm::SetBeginParamsAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_params_axis = axis; }
+
 int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -60,12 +55,17 @@ int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &
    } else {
      layer_norm_attr->epsilon = 1e-7;
    }
-    value_attr = prim.GetAttr("normalized_shape");
-    if (value_attr != nullptr) {
-      layer_norm_attr->normalizedShape = CastToInt(value_attr);
+    auto norm_axis_attr = prim.GetAttr("begin_norm_axis");
+    if (norm_axis_attr != nullptr) {
+      layer_norm_attr->begin_norm_axis = GetValue<float>(norm_axis_attr);
+    } else {
+      layer_norm_attr->begin_norm_axis = -1;
    }
-    if (inputs.size() == 3) {
-      layer_norm_attr->elementwiseAffine = true;
+    auto params_axis_attr = prim.GetAttr("begin_params_axis");
+    if (params_axis_attr != nullptr) {
+      layer_norm_attr->begin_params_axis = GetValue<float>(params_axis_attr);
+    } else {
+      layer_norm_attr->begin_params_axis = -1;
    }
    this->primitive_->value.value = layer_norm_attr;
  }
@ -81,28 +81,20 @@ int LayerNorm::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffe
    return RET_ERROR;
  }

-  std::vector<int32_t> normalizedShape;
-  if (attr->normalizedShape() != nullptr) {
-    for (int i = 0; i < static_cast<int>(attr->normalizedShape()->size()); i++) {
-      normalizedShape.push_back(attr->normalizedShape()->data()[i]);
-    }
-  }
-  auto val_offset = schema::CreateLayerNormDirect(*fbb, &normalizedShape, attr->epsilon(), attr->elementwiseAffine());
+  auto val_offset = schema::CreateLayerNorm(*fbb, attr->epsilon(), attr->begin_norm_axis(), attr->begin_params_axis());
  auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_LayerNorm, val_offset.o);
  fbb->Finish(prim_offset);
  return RET_OK;
 }
-std::vector<int> LayerNorm::GetNormalizedShape() const {
-  auto fb_vector = this->primitive_->value_as_LayerNorm()->normalizedShape();
-  return std::vector<int>(fb_vector->begin(), fb_vector->end());
-}
+
 float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); }
-bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); }
+int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value_as_LayerNorm()->begin_norm_axis(); }
+int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value_as_LayerNorm()->begin_params_axis(); }
+
 PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) {
  return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive);
 }
 Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator);
-
 #endif
 int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) {
  if (outputs_.size() != kSingleNum || (inputs_.size() != kSingleNum && inputs_.size() != kTripleNum)) {
@ -116,41 +108,13 @@ int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite:
  output->set_format(input->format());
  output->set_data_type(input->data_type());

-  if (GetElementwiseAffine() && inputs_.size() != kTripleNum) {
-    MS_LOG(INFO) << "input tensor amount error";
-    return RET_INPUT_TENSOR_ERROR;
-  }
-  if (!GetElementwiseAffine() && inputs_.size() != kSingleNum) {
-    MS_LOG(INFO) << "input tensor amount error";
-    return RET_INPUT_TENSOR_ERROR;
-  }
  if (!infer_flag()) {
    return RET_INFER_INVALID;
  }
  auto input_shape = input->shape();
-  normlized_shape_ = GetNormalizedShape();
-  elementwise_mode_ = GetElementwiseAffine() ? 2 : 0;
-  if (normlized_shape_.size() > input_shape.size()) {
-    MS_LOG(INFO) << "normalized_shape attr invalid";
-    return RET_PARAM_INVALID;
+  for (size_t i = GetBeginNormAxis(); i < input_shape.size(); i++) {
+    normlized_shape_.push_back(input_shape[i]);
  }
-  if (normlized_shape_.empty()) {
-    // instance norm -> layernorm only for nchw
-    if (input->format() == schema::Format_NCHW) {
-      normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 2, input_shape.end());
-      elementwise_mode_ = 1;
-    } else {
-      normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 1, input_shape.end());
-    }
-  }
-  size_t first_index = input_shape.size() - normlized_shape_.size();
-  for (size_t i = first_index; i < input_shape.size(); ++i) {
-    if (input_shape.at(i) != normlized_shape_.at(i - first_index)) {
-      MS_LOG(INFO) << "normalized_shape attr invalid";
-      return RET_PARAM_INVALID;
-    }
-  }
-
  output->set_shape(input_shape);
  return RET_OK;
 }
--- a/mindspore/lite/src/ops/layer_norm.h
+++ b/mindspore/lite/src/ops/layer_norm.h
@ -32,23 +32,21 @@ class LayerNorm : public PrimitiveC {
 #ifdef PRIMITIVE_WRITEABLE
  MS_DECLARE_PARENT(LayerNorm, PrimitiveC);
  explicit LayerNorm(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
-  void SetNormalizedShape(const std::vector<int> &normalizedShape);
  void SetEpsilon(float epsilon);
-  void SetElementwiseAffine(bool elementwiseAffine);
+  void SetBeginNormAxis(int axis);
+  void SetBeginParamsAxis(int axis);
  int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
 #else
  int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
 #endif
  int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
-  std::vector<int> GetNormalizedShape() const;
  float GetEpsilon() const;
-  bool GetElementwiseAffine() const;
-  std::vector<int> normlized_shape() const { return normlized_shape_; }
-  int elementwise_mode() const { return elementwise_mode_; }
+  int GetBeginNormAxis() const;
+  int GetBeginParamsAxis() const;
+  std::vector<int> GetNormlizedShape() const { return normlized_shape_; }

 protected:
  std::vector<int> normlized_shape_;
-  int elementwise_mode_ = 0;
 };
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/populate/layer_norm_populate.cc
+++ b/mindspore/lite/src/ops/populate/layer_norm_populate.cc
@ -14,9 +14,7 @@
 * limitations under the License.
 */

-#include "src/ops/populate/layer_norm_populate.h"
 #include "nnacl/layer_norm_parameter.h"
-#include <cstdint>
 #include "src/ops/layer_norm.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
@ -32,20 +30,15 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi
  memset(layer_norm_parameter, 0, sizeof(LayerNormParameter));
  layer_norm_parameter->op_parameter_.type_ = primitive->Type();
  auto param = reinterpret_cast<mindspore::lite::LayerNorm *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
-  auto normalized_shape = param->normlized_shape();
+  auto normalized_shape = param->GetNormlizedShape();
  layer_norm_parameter->normalized_dims_ = normalized_shape.size();
-  if (normalized_shape.size() > SIZE_MAX / sizeof(int)) {
-    MS_LOG(ERROR) << "normalized_shape size too big";
-    free(layer_norm_parameter);
-    return nullptr;
-  }
  MS_ASSERT(normalized_shape.size() < 8);
  for (size_t i = 0; i < normalized_shape.size(); i++) {
    layer_norm_parameter->normalized_shape_[i] = normalized_shape[i];
  }
  layer_norm_parameter->epsilon_ = param->GetEpsilon();
-  layer_norm_parameter->elementwise_mode_ = static_cast<ElementwiseMode>(param->elementwise_mode());
-
+  layer_norm_parameter->begin_norm_axis_ = param->GetBeginNormAxis();
+  layer_norm_parameter->begin_params_axis_ = param->GetBeginParamsAxis();
  return reinterpret_cast<OpParameter *>(layer_norm_parameter);
 }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@ -0,0 +1,78 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/fp32/instance_norm_fp32.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+#include "nnacl/fp32/instance_norm_fp32.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_InstanceNorm;
+
+namespace mindspore::kernel {
+int InstanceNormCPUKernel::Init() {
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int InstanceNormCPUKernel::ReSize() {
+  param_->op_parameter_.thread_num_ = context_->thread_num_;
+  auto shape = in_tensors_.front()->shape();
+  param_->batch_ = shape[0];
+  param_->inner_size_ = shape[2] * shape[3];
+  param_->channel_ = shape[1];
+  return RET_OK;
+}
+
+int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
+  int ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int InstanceNormRun(void *cdata, int task_id) {
+  auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
+  auto ret = kernel->DoInstanceNorm(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int InstanceNormCPUKernel::Run() {
+  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
+  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
+  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
+  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+  auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, LiteKernelCreator<InstanceNormCPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.h
@ -0,0 +1,50 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
+#include <vector>
+#include "src/lite_kernel.h"
+#include "include/context.h"
+#include "nnacl/instance_norm_parameter.h"
+
+using mindspore::lite::InnerContext;
+
+namespace mindspore::kernel {
+class InstanceNormCPUKernel : public LiteKernel {
+ public:
+  InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                        const mindspore::lite::PrimitiveC *primitive)
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
+  }
+  ~InstanceNormCPUKernel() override{};
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int DoInstanceNorm(int task_id);
+
+ private:
+  InstanceNormParameter *param_ = nullptr;
+  float *src_data_ = nullptr;
+  float *dst_data_ = nullptr;
+  float *gamma_data_ = nullptr;
+  float *beta_data_ = nullptr;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@ -18,7 +18,6 @@
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#include "src/ops/populate/layer_norm_populate.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
@ -35,29 +34,37 @@ int LayerNormCPUKernel::Init() {
 }

 int LayerNormCPUKernel::ReSize() {
-  if (op_parameter_ != nullptr) {
-    free(op_parameter_);
-    op_parameter_ = nullptr;
-  }
-  op_parameter_ = PopulateLayerNormParameter(primitive_);
-  op_parameter_->thread_num_ = context_->thread_num_;
-  param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
+  param_->begin_norm_axis_ = -1;
+  param_->begin_params_axis_ = -1;
+
  auto shape = in_tensors_.front()->shape();
-  outer_size_ = 1;
-  inner_size_ = 1;
-  for (size_t i = 0; i < shape.size(); ++i) {
-    if (i + param_->normalized_dims_ < shape.size()) {
-      outer_size_ *= shape.at(i);
-    } else {
-      inner_size_ *= shape.at(i);
-    }
+  param_->begin_norm_axis_ =
+    param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
+  param_->begin_params_axis_ =
+    param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
+
+  param_->norm_outer_size_ = 1;
+  for (int i = 0; i < param_->begin_norm_axis_; ++i) {
+    param_->norm_outer_size_ *= shape.at(i);
  }
+  param_->norm_inner_size_ = 1;
+  for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
+    param_->norm_inner_size_ *= shape.at(i);
+  }
+  param_->params_outer_size_ = 1;
+  for (int i = 0; i < param_->begin_params_axis_; ++i) {
+    param_->params_outer_size_ *= shape.at(i);
+  }
+  param_->params_inner_size_ = 1;
+  for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
+    param_->params_inner_size_ *= shape.at(i);
+  }
+  param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
  return RET_OK;
 }

 int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
-  int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_mode_,
-                      param_->epsilon_, dst_data_, thread_id, op_parameter_->thread_num_);
+  int ret = LayerNorm(src_data_, gamma_data_, beta_data_, dst_data_, param_, thread_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]";
    return ret;
@ -77,10 +84,8 @@ int LayerNormRun(void *cdata, int task_id) {

 int LayerNormCPUKernel::Run() {
  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  if (param_->elementwise_mode_ != 0) {
-    gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-    beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
-  }
+  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
+  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
  auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.h
@ -40,8 +40,6 @@ class LayerNormCPUKernel : public LiteKernel {

 private:
  LayerNormParameter *param_ = nullptr;
-  int outer_size_;
-  int inner_size_;
  float *src_data_ = nullptr;
  float *dst_data_ = nullptr;
  float *gamma_data_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc
@ -15,7 +15,6 @@
 */
 #include "src/runtime/kernel/arm/int8/layer_norm_int8.h"
 #include "src/runtime/runtime_api.h"
-#include "src/ops/populate/layer_norm_populate.h"

 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@ -25,15 +24,14 @@ using mindspore::schema::PrimitiveType_LayerNorm;

 namespace mindspore::kernel {
 LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() {
-  if (param_->elementwise_mode_ != 0 && gamma_ptr_ != nullptr) {
+  if (gamma_ptr_ != nullptr) {
    free(gamma_ptr_);
    gamma_ptr_ = nullptr;
  }
-  if (param_->elementwise_mode_ != 0 && beta_ptr_ != nullptr) {
+  if (beta_ptr_ != nullptr) {
    free(beta_ptr_);
    beta_ptr_ = nullptr;
  }
-  return;
 }

 int LayerNormInt8CPUKernel::SetQuantArgs() {
@ -45,33 +43,31 @@ int LayerNormInt8CPUKernel::SetQuantArgs() {
  quant_param_.out_zp_ = output->quant_params().front().zeroPoint;
  quant_param_.out_scale_ = output->quant_params().front().scale;

-  if (param_->elementwise_mode_ != 0) {
-    lite::Tensor *gamma_tensor = in_tensors_.at(1);
-    lite::Tensor *beta_tensor = in_tensors_.at(2);
+  lite::Tensor *gamma_tensor = in_tensors_.at(1);
+  lite::Tensor *beta_tensor = in_tensors_.at(2);

-    double gamma_scale = gamma_tensor->quant_params().front().scale;
-    int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
-    gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
-    if (gamma_ptr_ == nullptr) {
-      MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
-      return RET_ERROR;
-    }
-    int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
-    for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
-      gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
-    }
+  double gamma_scale = gamma_tensor->quant_params().front().scale;
+  int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
+  gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
+  if (gamma_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
+    return RET_ERROR;
+  }
+  int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
+  for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
+    gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
+  }

-    beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
-    if (beta_ptr_ == nullptr) {
-      MS_LOG(ERROR) << "malloc beta_ptr_ failed";
-      free(gamma_ptr_);
-      gamma_ptr_ = nullptr;
-      return RET_ERROR;
-    }
-    int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
-    for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
-      beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
-    }
+  beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
+  if (beta_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "malloc beta_ptr_ failed";
+    free(gamma_ptr_);
+    gamma_ptr_ = nullptr;
+    return RET_ERROR;
+  }
+  int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
+  for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
+    beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
  }
  return RET_OK;
 }
@ -86,30 +82,37 @@ int LayerNormInt8CPUKernel::Init() {
 }

 int LayerNormInt8CPUKernel::ReSize() {
-  if (op_parameter_ != nullptr) {
-    free(op_parameter_);
-    op_parameter_ = nullptr;
-  }
-  op_parameter_ = PopulateLayerNormParameter(primitive_);
-  if (op_parameter_ == nullptr) {
-    MS_LOG(ERROR) << "op_parameter_ is nullptr!";
-    return RET_NULL_PTR;
-  }
-  op_parameter_->thread_num_ = context_->thread_num_;
-  param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
-  auto shape = in_tensors_.front()->shape();
-  outer_size_ = 1;
-  inner_size_ = 1;
-  for (size_t i = 0; i < shape.size(); ++i) {
-    if (i + param_->normalized_dims_ < shape.size()) {
-      outer_size_ *= shape.at(i);
-    } else {
-      inner_size_ *= shape.at(i);
-    }
-  }
+  param_->begin_norm_axis_ = -1;
+  param_->begin_params_axis_ = -1;

-  param_->thread_count_ = MSMIN(outer_size_, op_parameter_->thread_num_);
-  param_->thread_outsize_ = UP_DIV(outer_size_, param_->thread_count_);
+  auto shape = in_tensors_.front()->shape();
+  param_->begin_norm_axis_ =
+    param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
+  param_->begin_params_axis_ =
+    param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
+
+  param_->norm_outer_size_ = 1;
+  for (int i = 0; i < param_->begin_norm_axis_; ++i) {
+    param_->norm_outer_size_ *= shape.at(i);
+  }
+  param_->norm_inner_size_ = 1;
+  for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
+    param_->norm_inner_size_ *= shape.at(i);
+  }
+  param_->params_outer_size_ = 1;
+  for (int i = 0; i < param_->begin_params_axis_; ++i) {
+    param_->params_outer_size_ *= shape.at(i);
+  }
+  param_->params_inner_size_ = 1;
+  for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
+    param_->params_inner_size_ *= shape.at(i);
+  }
+  param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
+  return RET_OK;
+}
+
+int LayerNormInt8CPUKernel::DoExecute(int task_id) {
+  LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id);
  return RET_OK;
 }

@ -119,21 +122,6 @@ int LayerNormInt8Run(void *cdata, int task_id) {
  return RET_OK;
 }

-int LayerNormInt8CPUKernel::DoExecute(int task_id) {
-  int current_out_size = outer_size_ - task_id * param_->thread_outsize_;
-  current_out_size = MSMIN(current_out_size, param_->thread_outsize_);
-  if (current_out_size <= 0) {
-    return RET_OK;
-  }
-
-  const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
-  int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
-
-  LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_mode_, current_out_size, inner_size_,
-                &quant_param_, param_->epsilon_);
-  return RET_OK;
-}
-
 int LayerNormInt8CPUKernel::Run() {
  src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
  dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
--- a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.h
@ -46,8 +46,6 @@ class LayerNormInt8CPUKernel : public LiteKernel {
 private:
  LayerNormParameter *param_ = nullptr;
  LayerNormQuantArg quant_param_;
-  int outer_size_ = 0;
-  int inner_size_ = 0;
  int8_t *src_ptr_ = nullptr;
  int8_t *dst_ptr_ = nullptr;
  float *gamma_ptr_ = nullptr;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@ -349,7 +349,7 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
      }
      break;
    }
-    case schema::ResizeMethod_UNKNOW:
+    case schema::ResizeMethod_UNKNOWN:
    default: {
      MS_LOG(ERROR) << "Resize unknown method " << method_;
      ret = RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
@ -33,24 +33,6 @@ namespace mindspore::kernel {

 int LayerNormOpenCLKernel::CheckSpecs() {
  auto param = reinterpret_cast<LayerNormParameter *>(this->op_parameter_);
-  if (param->elementwise_mode_ == ELEMENTWISE_PER_NUM) {
-    if (in_tensors_.size() != 3) {
-      MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
-      return RET_ERROR;
-    }
-    if (param->normalized_dims_ > in_tensors_.at(0)->shape().size()) {
-      MS_LOG(ERROR) << " invalid normalized_shape_ size" << param->normalized_dims_ << std::endl;
-      return RET_ERROR;
-    }
-  } else if (param->elementwise_mode_ == ELEMENTWISE_NOT) {
-    if (in_tensors_.size() != 1) {
-      MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
-      return RET_ERROR;
-    }
-  } else {
-    MS_LOG(ERROR) << "Unsupported elementwise_mode_" << param->elementwise_mode_;
-    return RET_ERROR;
-  }
  if (in_tensors_.at(0)->shape().size() != 4 || out_tensors_.size() != 1) {
    MS_LOG(ERROR) << "UnSupported in_tensors_.shape.size: " << in_tensors_.at(0)->shape().size()
                  << " out_tensors_.size(): " << out_tensors_.size();
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/layer_norm_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/layer_norm_tests.cc
@ -24,7 +24,6 @@ namespace {
 // PrimitiveType_Stack: src/ops/populate/stack_populate.cc
 OpParameter *CreateParameter(float epsilon, int normalized_dims_, std::vector<int> normalizedShape) {
  auto *param = test::CreateParameter<LayerNormParameter>(schema::PrimitiveType_LayerNorm);
-  param->elementwise_mode_ = ELEMENTWISE_PER_NUM;
  param->epsilon_ = epsilon;
  param->normalized_dims_ = normalized_dims_;
  for (int i = 0; i < normalizedShape.size() && i < normalized_dims_; ++i) {
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@ -48,7 +48,6 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = {
  schema::PrimitiveType_FusedBatchNorm,
  schema::PrimitiveType_PReLU,
  schema::PrimitiveType_BiasAdd,
-  schema::PrimitiveType_InstanceNorm,
  schema::PrimitiveType_SpaceToDepth,
  schema::PrimitiveType_DepthToSpace,
  schema::PrimitiveType_TopK};
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_instance_norm_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_instance_norm_parser.cc
@ -22,7 +22,7 @@ namespace lite {
 lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphProto &onnx_graph,
                                                             const onnx::NodeProto &onnx_node) {
  MS_LOG(DEBUG) << "onnx InstanceNormParser";
-  auto attr = std::make_unique<schema::LayerNormT>();
+  auto attr = std::make_unique<schema::InstanceNormT>();
  if (attr == nullptr) {
    MS_LOG(ERROR) << "new op failed";
    return nullptr;
@ -39,8 +39,7 @@ lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphPr
    MS_LOG(ERROR) << "new primitive failed";
    return nullptr;
  }
-  attr->elementwiseAffine = true;
-  primitive->value.type = schema::PrimitiveType_LayerNorm;
+  primitive->value.type = schema::PrimitiveType_InstanceNorm;
  primitive->value.value = attr.release();
  return PrimitiveC::Create(primitive.release());
 }
--- a/mindspore/lite/tools/converter/parser/tf/tf_resize_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_resize_parser.cc
@ -57,7 +57,7 @@ STATUS TFResizeParser::Parse(const tensorflow::NodeDef &tf_op,
  } else if (tf_op.op() == "ResizeNearestNeighbor") {
    attr->method = schema::ResizeMethod_NEAREST;
  } else {
-    attr->method = schema::ResizeMethod_UNKNOW;
+    attr->method = schema::ResizeMethod_UNKNOWN;
  }
  auto size_node = tf_node_map.at(tf_op.input(1));
  if (size_node == nullptr) {
--- a/mindspore/lite/tools/optimizer/fusion/layer_norm_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/layer_norm_fusion.cc
@ -114,9 +114,7 @@ CNodePtr LayerNormFusion::CreateLayerNormNode(const FuncGraphPtr &func_graph, co
  MS_EXCEPTION_IF_NULL(func_graph);
  auto layer_norm_primitive = std::make_unique<schema::PrimitiveT>();
  std::unique_ptr<schema::LayerNormT> attr = std::make_unique<schema::LayerNormT>();
-  attr->normalizedShape = shape;
  attr->epsilon = epsilon;
-  attr->elementwiseAffine = true;
  layer_norm_primitive->value.type = schema::PrimitiveType_LayerNorm;
  layer_norm_primitive->value.value = attr.release();
  auto layer_norm_cvalue = lite::PrimitiveC::Create(layer_norm_primitive.release());