!11407 [MSLITE][Develop] rewrite layer_norm and instance_norm converter and runtime

From: @yangruoqi713
This commit is contained in:
mindspore-ci-bot 2021-01-22 09:40:23 +08:00 committed by Gitee
commit 0836a0a72a
25 changed files with 466 additions and 329 deletions

View File

@ -0,0 +1,84 @@
* Copyright 2021 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include "nnacl/fp32/instance_norm_fp32.h"
#include <math.h>
#include "nnacl/errorcode.h"
#include "nnacl/op_base.h"
int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
const InstanceNormParameter *param, size_t task_id) {
if (src_data == NULL || dst_data == NULL) {
int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_);
int channel_begin = task_id * channel_step;
int channel_end = MSMIN(channel_begin + channel_step, param->channel_);
for (int b = 0; b < param->batch_; b++) {
const float *src_b = src_data + b * param->channel_ * param->inner_size_;
float *dst_b = dst_data + b * param->channel_ * param->inner_size_;
for (int c = channel_begin; c < channel_end; c++) {
const float *src = src_b + c * param->inner_size_;
float *dst = dst_b + c * param->inner_size_;
float mean = 0.0f;
float square_mean = 0.0f;
int index = 0;
float32x4_t sum = vdupq_n_f32(0);
float32x4_t square_sum = vdupq_n_f32(0);
for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
float32x4_t srcv = vld1q_f32(src + index);
float32x4_t squarev = vmulq_f32(srcv, srcv);
sum = vaddq_f32(sum, srcv);
square_sum = vaddq_f32(square_sum, squarev);
mean = sum[0] + sum[1] + sum[2] + sum[3];
square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
for (; index < param->inner_size_; index++) {
mean += src[index];
square_mean += src[index] * src[index];
mean /= (float)param->inner_size_;
square_mean /= (float)param->inner_size_;
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
index = 0;
float32x4_t meanv = vdupq_n_f32(mean);
float32x4_t denov = vdupq_n_f32(deno);
for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
float32x4_t srcv = vld1q_f32(src + index);
float32x4_t outv = vsubq_f32(srcv, meanv);
outv = vmulq_f32(outv, denov);
float32x4_t gammav = vdupq_n_f32(gamma_data[c]);
float32x4_t betav = vdupq_n_f32(beta_data[c]);
outv = vmulq_f32(outv, gammav);
outv = vaddq_f32(outv, betav);
vst1q_f32(dst + index, outv);
for (; index < param->inner_size_; index++) {
dst[index] = (src[index] - mean) * deno;
dst[index] = dst[index] * gamma_data[c] + beta_data[c];
return NNACL_OK;

View File

@ -1,5 +1,5 @@
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2021 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -13,14 +13,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
#include "src/ops/arithmetic.h"
#include "nnacl/op_base.h"
#include "nnacl/instance_norm_parameter.h"
namespace mindspore {
namespace lite {
OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive);
} // namespace lite
} // namespace mindspore
#ifdef __cplusplus
extern "C" {
int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
const InstanceNormParameter *param, size_t task_id);
#ifdef __cplusplus

View File

@ -18,100 +18,81 @@
#include "nnacl/errorcode.h"
#include "nnacl/op_base.h"
int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
size_t task_id, size_t thread_num) {
if (src_data == NULL || dst_data == NULL) {
void LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square_mean) {
int index = 0;
float32x4_t sum = vdupq_n_f32(0);
float32x4_t square_sum = vdupq_n_f32(0);
for (; index < num - C4NUM; index += C4NUM) {
float32x4_t srcv = vld1q_f32(src + index);
float32x4_t squarev = vmulq_f32(srcv, srcv);
sum = vaddq_f32(sum, srcv);
square_sum = vaddq_f32(square_sum, squarev);
if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
*mean = sum[0] + sum[1] + sum[2] + sum[3];
*square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
for (; index < num; index++) {
*mean += src[index];
*square_mean += src[index] * src[index];
*mean /= (float)num;
*square_mean /= (float)num;
void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data, const float *beta_data, int num,
const float mean, const float deno) {
int index = 0;
float32x4_t meanv = vdupq_n_f32(mean);
float32x4_t denov = vdupq_n_f32(deno);
for (; index < num - C4NUM; index += C4NUM) {
float32x4_t srcv = vld1q_f32(src + index);
float32x4_t outv = vsubq_f32(srcv, meanv);
outv = vmulq_f32(outv, denov);
float32x4_t gammav = vld1q_f32(gamma_data + index);
float32x4_t betav = vld1q_f32(beta_data + index);
outv = vmulq_f32(outv, gammav);
outv = vaddq_f32(outv, betav);
vst1q_f32(dst + index, outv);
for (; index < num; index++) {
dst[index] = (src[index] - mean) * (deno);
dst[index] = dst[index] * gamma_data[index] + beta_data[index];
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
LayerNormParameter *param, size_t task_id) {
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
for (size_t j = task_id; j < outer_size; j += thread_num) {
const float *src = src_data + j * inner_size;
float *dst = dst_data + j * inner_size;
int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
for (int i = task_id * step; i < thread_end; i++) {
const float *src_norm = src_data + i * param->norm_inner_size_;
float *dst_norm = dst_data + i * param->norm_inner_size_;
float mean = 0.0f;
float square_mean = 0.0f;
LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &mean, &square_mean);
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
int index = 0;
float32x4_t sum = vdupq_n_f32(0);
float32x4_t square_sum = vdupq_n_f32(0);
for (; index < inner_size - C8NUM; index += C8NUM) {
float32x4_t srcv1 = vld1q_f32(src + index);
float32x4_t srcv2 = vld1q_f32(src + index + 4);
float32x4_t squarev1 = vmulq_f32(srcv1, srcv1);
float32x4_t squarev2 = vmulq_f32(srcv2, srcv2);
sum = vaddq_f32(sum, srcv1);
sum = vaddq_f32(sum, srcv2);
square_sum = vaddq_f32(square_sum, squarev1);
square_sum = vaddq_f32(square_sum, squarev2);
mean = sum[0] + sum[1] + sum[2] + sum[3];
square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
for (; index < inner_size; index++) {
mean += src[index];
square_mean += src[index] * src[index];
mean /= (float)inner_size;
square_mean /= (float)inner_size;
const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
index = 0;
float32x4_t meanv = vdupq_n_f32(mean);
float32x4_t denov = vdupq_n_f32(deno);
if (elementwise_mode != 0) {
for (; index < inner_size - C8NUM; index += C8NUM) {
float32x4_t srcv1 = vld1q_f32(src + index);
float32x4_t srcv2 = vld1q_f32(src + index + 4);
float32x4_t outv1 = vsubq_f32(srcv1, meanv);
float32x4_t outv2 = vsubq_f32(srcv2, meanv);
outv1 = vmulq_f32(outv1, denov);
outv2 = vmulq_f32(outv2, denov);
if (elementwise_mode == 1) {
float32x4_t gammav1 = vdupq_n_f32(gamma_data[j]);
float32x4_t betav1 = vdupq_n_f32(beta_data[j]);
outv1 = vmulq_f32(outv1, gammav1);
outv2 = vmulq_f32(outv2, gammav1);
outv1 = vaddq_f32(outv1, betav1);
outv2 = vaddq_f32(outv2, betav1);
} else {
float32x4_t gammav1 = vld1q_f32(gamma_data + index);
float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4);
float32x4_t betav1 = vld1q_f32(beta_data + index);
float32x4_t betav2 = vld1q_f32(beta_data + index + 4);
outv1 = vmulq_f32(outv1, gammav1);
outv2 = vmulq_f32(outv2, gammav2);
outv1 = vaddq_f32(outv1, betav1);
outv2 = vaddq_f32(outv2, betav2);
vst1q_f32(dst + index, outv1);
vst1q_f32(dst + index + 4, outv2);
if (param->norm_outer_size_ <= param->params_outer_size_) {
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
const float *src_param = src_norm + x * param->params_inner_size_;
float *dst_param = dst_norm + x * param->params_inner_size_;
LayerNormGammaAndBeta(dst_param, src_param, gamma_data, beta_data, param->params_inner_size_, mean, deno);
} else {
for (; index < inner_size - C8NUM; index += C8NUM) {
float32x4_t srcv1 = vld1q_f32(src + index);
float32x4_t srcv2 = vld1q_f32(src + index + 4);
float32x4_t outv1 = vsubq_f32(srcv1, meanv);
float32x4_t outv2 = vsubq_f32(srcv2, meanv);
outv1 = vmulq_f32(outv1, denov);
outv2 = vmulq_f32(outv2, denov);
vst1q_f32(dst + index, outv1);
vst1q_f32(dst + index + 4, outv2);
for (; index < inner_size; index++) {
dst[index] = (src[index] - mean) * deno;
if (elementwise_mode == 1) {
dst[index] = dst[index] * gamma_data[j] + beta_data[j];
} else if (elementwise_mode == 2) {
dst[index] = dst[index] * gamma_data[index] + beta_data[index];
int x = i / param->norm_outer_size_;
const float *src_param = src_norm + x * param->params_inner_size_;
float *dst_param = dst_norm + x * param->params_inner_size_;
const float *gamma = gamma_data + x * param->params_inner_size_;
const float *beta = beta_data + x * param->params_inner_size_;
LayerNormGammaAndBeta(dst_param, src_param, gamma, beta, param->norm_inner_size_, mean, deno);
return NNACL_OK;

View File

@ -23,9 +23,8 @@
extern "C" {
int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
size_t task_id, size_t thread_num);
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
LayerNormParameter *param, size_t task_id);
#ifdef __cplusplus

View File

@ -23,9 +23,10 @@ typedef struct InstanceNormParameter {
// Primitive parameter
OpParameter op_parameter_;
float epsilon_;
float momentum_;
// shape correlative
int batch_;
int channel_;
int inner_size_;
} InstanceNormParameter;

View File

@ -16,45 +16,58 @@
#include "nnacl/int8/layer_norm_int8.h"
void LayerNormGammaAndBetaInt8(int8_t *dst, const int8_t *src, const float *gamma_data, const float *beta_data,
LayerNormQuantArg *quant, int num, const float mean, const float deno) {
for (int i = 0; i < num; i++) {
float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
float fp32_dst = (fp32_src - mean) * deno;
fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
* origin : (x-mean) / sqrt(variance + epsilon) * gamma + beta
* quant : (x-mean) / sqrt(sum(x * x) - mean * mean) * gamma + beta
* */
int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
float epsilon) {
if (src_data == NULL || dst_data == NULL) {
LayerNormParameter *param, LayerNormQuantArg *quant, int task_id) {
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
for (int out_index = 0; out_index < outer_size; out_index++) {
const int8_t *src = src_data + out_index * inner_size;
int8_t *dst = dst_data + out_index * inner_size;
int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
for (int i = task_id * step; i < thread_end; i++) {
const int8_t *src_norm = src_data + i * param->norm_inner_size_;
int8_t *dst_norm = dst_data + i * param->norm_inner_size_;
float mean = 0.0f;
float square_mean = 0.0f;
for (int i = 0; i < inner_size; i++) {
float float_src = (src[i] - quant->in_zp_) * quant->in_scale_;
for (int j = 0; j < param->norm_inner_size_; j++) {
float float_src = (src_norm[j] - quant->in_zp_) * quant->in_scale_;
mean += float_src;
square_mean += float_src * float_src;
mean /= (float)inner_size;
square_mean /= (float)inner_size;
const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
for (int i = 0; i < inner_size; i++) {
float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
float fp32_dst = (fp32_src - mean) * deno;
if (elementwise_mode == 1) {
fp32_dst = fp32_dst * gamma_data[out_index] + beta_data[out_index];
} else if (elementwise_mode == 2) {
fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
mean /= (float)param->norm_inner_size_;
square_mean /= (float)param->norm_inner_size_;
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
if (param->norm_outer_size_ <= param->params_outer_size_) {
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
const int8_t *src_param = src_norm + x * param->params_inner_size_;
int8_t *dst_param = dst_norm + x * param->params_inner_size_;
LayerNormGammaAndBetaInt8(dst_param, src_param, gamma_data, beta_data, quant, param->norm_inner_size_, mean,
int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
} else {
int x = i / param->norm_outer_size_;
const int8_t *src_param = src_norm + x * param->params_inner_size_;
int8_t *dst_param = dst_norm + x * param->params_inner_size_;
const float *gamma = gamma_data + x * param->params_inner_size_;
const float *beta = beta_data + x * param->params_inner_size_;
LayerNormGammaAndBetaInt8(dst_param, src_param, gamma, beta, quant, param->norm_inner_size_, mean, deno);
return NNACL_OK;

View File

@ -25,8 +25,7 @@ extern "C" {
int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
float epsilon);
LayerNormParameter *param, LayerNormQuantArg *quant, int task_id);
#ifdef __cplusplus

View File

@ -24,10 +24,15 @@ typedef struct LayerNormParameter {
// Primitive parameter
OpParameter op_parameter_;
float epsilon_;
enum ElementwiseMode elementwise_mode_;
int begin_norm_axis_;
int begin_params_axis_;
// shape correlative
int normalized_shape_[8];
int norm_inner_size_;
int norm_outer_size_;
int params_inner_size_;
int params_outer_size_;
int normalized_dims_;
int normalized_shape_[8];
// other parameter
int thread_count_;
int thread_outsize_;

View File

@ -17,7 +17,7 @@
namespace mindspore.schema;
enum ResizeMethod: byte {
UNKNOW = -1,
@ -80,7 +80,7 @@ enum ActivationType : byte {
SIGN = 17,
SWISH = 18,
enum ActivationGradType : byte {
@ -99,7 +99,7 @@ enum ActivationGradType : byte {
LINEAR = 15,
enum ReduceType : byte {
@ -109,7 +109,7 @@ enum ReduceType : byte {
enum PoolMode : byte {
@ -121,7 +121,7 @@ enum EltwiseMode : byte {
PROD = 0,
SUM = 1,
enum PadMode : byte {
@ -1144,9 +1144,9 @@ table Identity {
table LayerNorm {
normalizedShape : [int];
begin_norm_axis : int;
begin_params_axis : int;
epsilon : float = 0.00001;
elementwiseAffine : bool;
table While {

View File

@ -22,19 +22,14 @@
namespace mindspore {
namespace lite {
std::vector<int> LayerNorm::GetNormalizedShape() const {
return this->primitive_->value.AsLayerNorm()->normalizedShape;
float LayerNorm::GetEpsilon() const { return this->primitive_->value.AsLayerNorm()->epsilon; }
bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value.AsLayerNorm()->elementwiseAffine; }
int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value.AsLayerNorm()->begin_norm_axis; }
int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value.AsLayerNorm()->begin_params_axis; }
void LayerNorm::SetNormalizedShape(const std::vector<int> &normalizedShape) {
this->primitive_->value.AsLayerNorm()->normalizedShape = normalizedShape;
void LayerNorm::SetEpsilon(float epsilon) { this->primitive_->value.AsLayerNorm()->epsilon = epsilon; }
void LayerNorm::SetElementwiseAffine(bool elementwiseAffine) {
this->primitive_->value.AsLayerNorm()->elementwiseAffine = elementwiseAffine;
void LayerNorm::SetBeginNormAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_norm_axis = axis; }
void LayerNorm::SetBeginParamsAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_params_axis = axis; }
int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
if (this->primitive_ == nullptr) {
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -60,12 +55,17 @@ int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &
} else {
layer_norm_attr->epsilon = 1e-7;
value_attr = prim.GetAttr("normalized_shape");
if (value_attr != nullptr) {
layer_norm_attr->normalizedShape = CastToInt(value_attr);
auto norm_axis_attr = prim.GetAttr("begin_norm_axis");
if (norm_axis_attr != nullptr) {
layer_norm_attr->begin_norm_axis = GetValue<float>(norm_axis_attr);
} else {
layer_norm_attr->begin_norm_axis = -1;
if (inputs.size() == 3) {
layer_norm_attr->elementwiseAffine = true;
auto params_axis_attr = prim.GetAttr("begin_params_axis");
if (params_axis_attr != nullptr) {
layer_norm_attr->begin_params_axis = GetValue<float>(params_axis_attr);
} else {
layer_norm_attr->begin_params_axis = -1;
this->primitive_->value.value = layer_norm_attr;
@ -81,28 +81,20 @@ int LayerNorm::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffe
return RET_ERROR;
std::vector<int32_t> normalizedShape;
if (attr->normalizedShape() != nullptr) {
for (int i = 0; i < static_cast<int>(attr->normalizedShape()->size()); i++) {
auto val_offset = schema::CreateLayerNormDirect(*fbb, &normalizedShape, attr->epsilon(), attr->elementwiseAffine());
auto val_offset = schema::CreateLayerNorm(*fbb, attr->epsilon(), attr->begin_norm_axis(), attr->begin_params_axis());
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_LayerNorm, val_offset.o);
return RET_OK;
std::vector<int> LayerNorm::GetNormalizedShape() const {
auto fb_vector = this->primitive_->value_as_LayerNorm()->normalizedShape();
return std::vector<int>(fb_vector->begin(), fb_vector->end());
float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); }
bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); }
int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value_as_LayerNorm()->begin_norm_axis(); }
int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value_as_LayerNorm()->begin_params_axis(); }
PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive);
Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator);
int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) {
if (outputs_.size() != kSingleNum || (inputs_.size() != kSingleNum && inputs_.size() != kTripleNum)) {
@ -116,41 +108,13 @@ int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite:
if (GetElementwiseAffine() && inputs_.size() != kTripleNum) {
MS_LOG(INFO) << "input tensor amount error";
if (!GetElementwiseAffine() && inputs_.size() != kSingleNum) {
MS_LOG(INFO) << "input tensor amount error";
if (!infer_flag()) {
auto input_shape = input->shape();
normlized_shape_ = GetNormalizedShape();
elementwise_mode_ = GetElementwiseAffine() ? 2 : 0;
if (normlized_shape_.size() > input_shape.size()) {
MS_LOG(INFO) << "normalized_shape attr invalid";
for (size_t i = GetBeginNormAxis(); i < input_shape.size(); i++) {
if (normlized_shape_.empty()) {
// instance norm -> layernorm only for nchw
if (input->format() == schema::Format_NCHW) {
normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 2, input_shape.end());
elementwise_mode_ = 1;
} else {
normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 1, input_shape.end());
size_t first_index = input_shape.size() - normlized_shape_.size();
for (size_t i = first_index; i < input_shape.size(); ++i) {
if (input_shape.at(i) != normlized_shape_.at(i - first_index)) {
MS_LOG(INFO) << "normalized_shape attr invalid";
return RET_OK;

View File

@ -32,23 +32,21 @@ class LayerNorm : public PrimitiveC {
MS_DECLARE_PARENT(LayerNorm, PrimitiveC);
explicit LayerNorm(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
void SetNormalizedShape(const std::vector<int> &normalizedShape);
void SetEpsilon(float epsilon);
void SetElementwiseAffine(bool elementwiseAffine);
void SetBeginNormAxis(int axis);
void SetBeginParamsAxis(int axis);
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
std::vector<int> GetNormalizedShape() const;
float GetEpsilon() const;
bool GetElementwiseAffine() const;
std::vector<int> normlized_shape() const { return normlized_shape_; }
int elementwise_mode() const { return elementwise_mode_; }
int GetBeginNormAxis() const;
int GetBeginParamsAxis() const;
std::vector<int> GetNormlizedShape() const { return normlized_shape_; }
std::vector<int> normlized_shape_;
int elementwise_mode_ = 0;
} // namespace lite
} // namespace mindspore

View File

@ -14,9 +14,7 @@
* limitations under the License.
#include "src/ops/populate/layer_norm_populate.h"
#include "nnacl/layer_norm_parameter.h"
#include <cstdint>
#include "src/ops/layer_norm.h"
#include "src/ops/primitive_c.h"
#include "src/ops/populate/populate_register.h"
@ -32,20 +30,15 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi
memset(layer_norm_parameter, 0, sizeof(LayerNormParameter));
layer_norm_parameter->op_parameter_.type_ = primitive->Type();
auto param = reinterpret_cast<mindspore::lite::LayerNorm *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
auto normalized_shape = param->normlized_shape();
auto normalized_shape = param->GetNormlizedShape();
layer_norm_parameter->normalized_dims_ = normalized_shape.size();
if (normalized_shape.size() > SIZE_MAX / sizeof(int)) {
MS_LOG(ERROR) << "normalized_shape size too big";
return nullptr;
MS_ASSERT(normalized_shape.size() < 8);
for (size_t i = 0; i < normalized_shape.size(); i++) {
layer_norm_parameter->normalized_shape_[i] = normalized_shape[i];
layer_norm_parameter->epsilon_ = param->GetEpsilon();
layer_norm_parameter->elementwise_mode_ = static_cast<ElementwiseMode>(param->elementwise_mode());
layer_norm_parameter->begin_norm_axis_ = param->GetBeginNormAxis();
layer_norm_parameter->begin_params_axis_ = param->GetBeginParamsAxis();
return reinterpret_cast<OpParameter *>(layer_norm_parameter);

View File

@ -0,0 +1,78 @@
* Copyright 2021 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include "src/runtime/kernel/arm/fp32/instance_norm_fp32.h"
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/fp32/instance_norm_fp32.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_InstanceNorm;
namespace mindspore::kernel {
int InstanceNormCPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;
return ReSize();
int InstanceNormCPUKernel::ReSize() {
param_->op_parameter_.thread_num_ = context_->thread_num_;
auto shape = in_tensors_.front()->shape();
param_->batch_ = shape[0];
param_->inner_size_ = shape[2] * shape[3];
param_->channel_ = shape[1];
return RET_OK;
int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
int ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
return ret;
return RET_OK;
int InstanceNormRun(void *cdata, int task_id) {
auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
auto ret = kernel->DoInstanceNorm(task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;
return RET_OK;
int InstanceNormCPUKernel::Run() {
src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
return ret;
return RET_OK;
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, LiteKernelCreator<InstanceNormCPUKernel>)
} // namespace mindspore::kernel

View File

@ -0,0 +1,50 @@
* Copyright 2021 Huawei Technologies Co., Ltd
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "nnacl/instance_norm_parameter.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class InstanceNormCPUKernel : public LiteKernel {
InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
~InstanceNormCPUKernel() override{};
int Init() override;
int ReSize() override;
int Run() override;
int DoInstanceNorm(int task_id);
InstanceNormParameter *param_ = nullptr;
float *src_data_ = nullptr;
float *dst_data_ = nullptr;
float *gamma_data_ = nullptr;
float *beta_data_ = nullptr;
} // namespace mindspore::kernel

View File

@ -18,7 +18,6 @@
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/ops/populate/layer_norm_populate.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
@ -35,29 +34,37 @@ int LayerNormCPUKernel::Init() {
int LayerNormCPUKernel::ReSize() {
if (op_parameter_ != nullptr) {
op_parameter_ = nullptr;
op_parameter_ = PopulateLayerNormParameter(primitive_);
op_parameter_->thread_num_ = context_->thread_num_;
param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
param_->begin_norm_axis_ = -1;
param_->begin_params_axis_ = -1;
auto shape = in_tensors_.front()->shape();
outer_size_ = 1;
inner_size_ = 1;
for (size_t i = 0; i < shape.size(); ++i) {
if (i + param_->normalized_dims_ < shape.size()) {
outer_size_ *= shape.at(i);
} else {
inner_size_ *= shape.at(i);
param_->begin_norm_axis_ =
param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
param_->begin_params_axis_ =
param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
param_->norm_outer_size_ = 1;
for (int i = 0; i < param_->begin_norm_axis_; ++i) {
param_->norm_outer_size_ *= shape.at(i);
param_->norm_inner_size_ = 1;
for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
param_->norm_inner_size_ *= shape.at(i);
param_->params_outer_size_ = 1;
for (int i = 0; i < param_->begin_params_axis_; ++i) {
param_->params_outer_size_ *= shape.at(i);
param_->params_inner_size_ = 1;
for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
param_->params_inner_size_ *= shape.at(i);
param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
return RET_OK;
int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_mode_,
param_->epsilon_, dst_data_, thread_id, op_parameter_->thread_num_);
int ret = LayerNorm(src_data_, gamma_data_, beta_data_, dst_data_, param_, thread_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]";
return ret;
@ -77,10 +84,8 @@ int LayerNormRun(void *cdata, int task_id) {
int LayerNormCPUKernel::Run() {
src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
if (param_->elementwise_mode_ != 0) {
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {

View File

@ -40,8 +40,6 @@ class LayerNormCPUKernel : public LiteKernel {
LayerNormParameter *param_ = nullptr;
int outer_size_;
int inner_size_;
float *src_data_ = nullptr;
float *dst_data_ = nullptr;
float *gamma_data_ = nullptr;

View File

@ -15,7 +15,6 @@
#include "src/runtime/kernel/arm/int8/layer_norm_int8.h"
#include "src/runtime/runtime_api.h"
#include "src/ops/populate/layer_norm_populate.h"
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
@ -25,15 +24,14 @@ using mindspore::schema::PrimitiveType_LayerNorm;
namespace mindspore::kernel {
LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() {
if (param_->elementwise_mode_ != 0 && gamma_ptr_ != nullptr) {
if (gamma_ptr_ != nullptr) {
gamma_ptr_ = nullptr;
if (param_->elementwise_mode_ != 0 && beta_ptr_ != nullptr) {
if (beta_ptr_ != nullptr) {
beta_ptr_ = nullptr;
int LayerNormInt8CPUKernel::SetQuantArgs() {
@ -45,33 +43,31 @@ int LayerNormInt8CPUKernel::SetQuantArgs() {
quant_param_.out_zp_ = output->quant_params().front().zeroPoint;
quant_param_.out_scale_ = output->quant_params().front().scale;
if (param_->elementwise_mode_ != 0) {
lite::Tensor *gamma_tensor = in_tensors_.at(1);
lite::Tensor *beta_tensor = in_tensors_.at(2);
lite::Tensor *gamma_tensor = in_tensors_.at(1);
lite::Tensor *beta_tensor = in_tensors_.at(2);
double gamma_scale = gamma_tensor->quant_params().front().scale;
int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
if (gamma_ptr_ == nullptr) {
MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
return RET_ERROR;
int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
double gamma_scale = gamma_tensor->quant_params().front().scale;
int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
if (gamma_ptr_ == nullptr) {
MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
return RET_ERROR;
int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
if (beta_ptr_ == nullptr) {
MS_LOG(ERROR) << "malloc beta_ptr_ failed";
gamma_ptr_ = nullptr;
return RET_ERROR;
int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
if (beta_ptr_ == nullptr) {
MS_LOG(ERROR) << "malloc beta_ptr_ failed";
gamma_ptr_ = nullptr;
return RET_ERROR;
int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
return RET_OK;
@ -86,30 +82,37 @@ int LayerNormInt8CPUKernel::Init() {
int LayerNormInt8CPUKernel::ReSize() {
if (op_parameter_ != nullptr) {
op_parameter_ = nullptr;
op_parameter_ = PopulateLayerNormParameter(primitive_);
if (op_parameter_ == nullptr) {
MS_LOG(ERROR) << "op_parameter_ is nullptr!";
return RET_NULL_PTR;
op_parameter_->thread_num_ = context_->thread_num_;
param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
auto shape = in_tensors_.front()->shape();
outer_size_ = 1;
inner_size_ = 1;
for (size_t i = 0; i < shape.size(); ++i) {
if (i + param_->normalized_dims_ < shape.size()) {
outer_size_ *= shape.at(i);
} else {
inner_size_ *= shape.at(i);
param_->begin_norm_axis_ = -1;
param_->begin_params_axis_ = -1;
param_->thread_count_ = MSMIN(outer_size_, op_parameter_->thread_num_);
param_->thread_outsize_ = UP_DIV(outer_size_, param_->thread_count_);
auto shape = in_tensors_.front()->shape();
param_->begin_norm_axis_ =
param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
param_->begin_params_axis_ =
param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
param_->norm_outer_size_ = 1;
for (int i = 0; i < param_->begin_norm_axis_; ++i) {
param_->norm_outer_size_ *= shape.at(i);
param_->norm_inner_size_ = 1;
for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
param_->norm_inner_size_ *= shape.at(i);
param_->params_outer_size_ = 1;
for (int i = 0; i < param_->begin_params_axis_; ++i) {
param_->params_outer_size_ *= shape.at(i);
param_->params_inner_size_ = 1;
for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
param_->params_inner_size_ *= shape.at(i);
param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
return RET_OK;
int LayerNormInt8CPUKernel::DoExecute(int task_id) {
LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id);
return RET_OK;
@ -119,21 +122,6 @@ int LayerNormInt8Run(void *cdata, int task_id) {
return RET_OK;
int LayerNormInt8CPUKernel::DoExecute(int task_id) {
int current_out_size = outer_size_ - task_id * param_->thread_outsize_;
current_out_size = MSMIN(current_out_size, param_->thread_outsize_);
if (current_out_size <= 0) {
return RET_OK;
const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_mode_, current_out_size, inner_size_,
&quant_param_, param_->epsilon_);
return RET_OK;
int LayerNormInt8CPUKernel::Run() {
src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());

View File

@ -46,8 +46,6 @@ class LayerNormInt8CPUKernel : public LiteKernel {
LayerNormParameter *param_ = nullptr;
LayerNormQuantArg quant_param_;
int outer_size_ = 0;
int inner_size_ = 0;
int8_t *src_ptr_ = nullptr;
int8_t *dst_ptr_ = nullptr;
float *gamma_ptr_ = nullptr;

View File

@ -349,7 +349,7 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
case schema::ResizeMethod_UNKNOW:
case schema::ResizeMethod_UNKNOWN:
default: {
MS_LOG(ERROR) << "Resize unknown method " << method_;
ret = RET_ERROR;

View File

@ -33,24 +33,6 @@ namespace mindspore::kernel {
int LayerNormOpenCLKernel::CheckSpecs() {
auto param = reinterpret_cast<LayerNormParameter *>(this->op_parameter_);
if (param->elementwise_mode_ == ELEMENTWISE_PER_NUM) {
if (in_tensors_.size() != 3) {
MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
return RET_ERROR;
if (param->normalized_dims_ > in_tensors_.at(0)->shape().size()) {
MS_LOG(ERROR) << " invalid normalized_shape_ size" << param->normalized_dims_ << std::endl;
return RET_ERROR;
} else if (param->elementwise_mode_ == ELEMENTWISE_NOT) {
if (in_tensors_.size() != 1) {
MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
return RET_ERROR;
} else {
MS_LOG(ERROR) << "Unsupported elementwise_mode_" << param->elementwise_mode_;
return RET_ERROR;
if (in_tensors_.at(0)->shape().size() != 4 || out_tensors_.size() != 1) {
MS_LOG(ERROR) << "UnSupported in_tensors_.shape.size: " << in_tensors_.at(0)->shape().size()
<< " out_tensors_.size(): " << out_tensors_.size();

View File

@ -24,7 +24,6 @@ namespace {
// PrimitiveType_Stack: src/ops/populate/stack_populate.cc
OpParameter *CreateParameter(float epsilon, int normalized_dims_, std::vector<int> normalizedShape) {
auto *param = test::CreateParameter<LayerNormParameter>(schema::PrimitiveType_LayerNorm);
param->elementwise_mode_ = ELEMENTWISE_PER_NUM;
param->epsilon_ = epsilon;
param->normalized_dims_ = normalized_dims_;
for (int i = 0; i < normalizedShape.size() && i < normalized_dims_; ++i) {

View File

@ -48,7 +48,6 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = {

View File

@ -22,7 +22,7 @@ namespace lite {
lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphProto &onnx_graph,
const onnx::NodeProto &onnx_node) {
MS_LOG(DEBUG) << "onnx InstanceNormParser";
auto attr = std::make_unique<schema::LayerNormT>();
auto attr = std::make_unique<schema::InstanceNormT>();
if (attr == nullptr) {
MS_LOG(ERROR) << "new op failed";
return nullptr;
@ -39,8 +39,7 @@ lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphPr
MS_LOG(ERROR) << "new primitive failed";
return nullptr;
attr->elementwiseAffine = true;
primitive->value.type = schema::PrimitiveType_LayerNorm;
primitive->value.type = schema::PrimitiveType_InstanceNorm;
primitive->value.value = attr.release();
return PrimitiveC::Create(primitive.release());

View File

@ -57,7 +57,7 @@ STATUS TFResizeParser::Parse(const tensorflow::NodeDef &tf_op,
} else if (tf_op.op() == "ResizeNearestNeighbor") {
attr->method = schema::ResizeMethod_NEAREST;
} else {
attr->method = schema::ResizeMethod_UNKNOW;
attr->method = schema::ResizeMethod_UNKNOWN;
auto size_node = tf_node_map.at(tf_op.input(1));
if (size_node == nullptr) {

View File

@ -114,9 +114,7 @@ CNodePtr LayerNormFusion::CreateLayerNormNode(const FuncGraphPtr &func_graph, co
auto layer_norm_primitive = std::make_unique<schema::PrimitiveT>();
std::unique_ptr<schema::LayerNormT> attr = std::make_unique<schema::LayerNormT>();
attr->normalizedShape = shape;
attr->epsilon = epsilon;
attr->elementwiseAffine = true;
layer_norm_primitive->value.type = schema::PrimitiveType_LayerNorm;
layer_norm_primitive->value.value = attr.release();
auto layer_norm_cvalue = lite::PrimitiveC::Create(layer_norm_primitive.release());