forked from mindspore-Ecosystem/mindspore
!11407 [MSLITE][Develop] rewrite layer_norm and instance_norm converter and runtime
From: @yangruoqi713 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
0836a0a72a
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "nnacl/fp32/instance_norm_fp32.h"
|
||||
#include <math.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
|
||||
const InstanceNormParameter *param, size_t task_id) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_);
|
||||
int channel_begin = task_id * channel_step;
|
||||
int channel_end = MSMIN(channel_begin + channel_step, param->channel_);
|
||||
|
||||
for (int b = 0; b < param->batch_; b++) {
|
||||
const float *src_b = src_data + b * param->channel_ * param->inner_size_;
|
||||
float *dst_b = dst_data + b * param->channel_ * param->inner_size_;
|
||||
for (int c = channel_begin; c < channel_end; c++) {
|
||||
const float *src = src_b + c * param->inner_size_;
|
||||
float *dst = dst_b + c * param->inner_size_;
|
||||
float mean = 0.0f;
|
||||
float square_mean = 0.0f;
|
||||
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t sum = vdupq_n_f32(0);
|
||||
float32x4_t square_sum = vdupq_n_f32(0);
|
||||
for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
|
||||
float32x4_t srcv = vld1q_f32(src + index);
|
||||
float32x4_t squarev = vmulq_f32(srcv, srcv);
|
||||
sum = vaddq_f32(sum, srcv);
|
||||
square_sum = vaddq_f32(square_sum, squarev);
|
||||
}
|
||||
mean = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
|
||||
#endif
|
||||
for (; index < param->inner_size_; index++) {
|
||||
mean += src[index];
|
||||
square_mean += src[index] * src[index];
|
||||
}
|
||||
|
||||
mean /= (float)param->inner_size_;
|
||||
square_mean /= (float)param->inner_size_;
|
||||
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
|
||||
|
||||
index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t meanv = vdupq_n_f32(mean);
|
||||
float32x4_t denov = vdupq_n_f32(deno);
|
||||
for (; index < param->inner_size_ - C4NUM; index += C4NUM) {
|
||||
float32x4_t srcv = vld1q_f32(src + index);
|
||||
float32x4_t outv = vsubq_f32(srcv, meanv);
|
||||
outv = vmulq_f32(outv, denov);
|
||||
|
||||
float32x4_t gammav = vdupq_n_f32(gamma_data[c]);
|
||||
float32x4_t betav = vdupq_n_f32(beta_data[c]);
|
||||
outv = vmulq_f32(outv, gammav);
|
||||
outv = vaddq_f32(outv, betav);
|
||||
vst1q_f32(dst + index, outv);
|
||||
}
|
||||
#endif
|
||||
for (; index < param->inner_size_; index++) {
|
||||
dst[index] = (src[index] - mean) * deno;
|
||||
dst[index] = dst[index] * gamma_data[c] + beta_data[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -13,14 +13,20 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
|
||||
|
||||
#include "src/ops/arithmetic.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive);
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data,
|
||||
const InstanceNormParameter *param, size_t task_id);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
|
|
@ -18,100 +18,81 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
|
||||
const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
|
||||
size_t task_id, size_t thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
void LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square_mean) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t sum = vdupq_n_f32(0);
|
||||
float32x4_t square_sum = vdupq_n_f32(0);
|
||||
for (; index < num - C4NUM; index += C4NUM) {
|
||||
float32x4_t srcv = vld1q_f32(src + index);
|
||||
float32x4_t squarev = vmulq_f32(srcv, srcv);
|
||||
sum = vaddq_f32(sum, srcv);
|
||||
square_sum = vaddq_f32(square_sum, squarev);
|
||||
}
|
||||
if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
|
||||
*mean = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
*square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
|
||||
#endif
|
||||
for (; index < num; index++) {
|
||||
*mean += src[index];
|
||||
*square_mean += src[index] * src[index];
|
||||
}
|
||||
|
||||
*mean /= (float)num;
|
||||
*square_mean /= (float)num;
|
||||
}
|
||||
|
||||
void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data, const float *beta_data, int num,
|
||||
const float mean, const float deno) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t meanv = vdupq_n_f32(mean);
|
||||
float32x4_t denov = vdupq_n_f32(deno);
|
||||
for (; index < num - C4NUM; index += C4NUM) {
|
||||
float32x4_t srcv = vld1q_f32(src + index);
|
||||
float32x4_t outv = vsubq_f32(srcv, meanv);
|
||||
outv = vmulq_f32(outv, denov);
|
||||
float32x4_t gammav = vld1q_f32(gamma_data + index);
|
||||
float32x4_t betav = vld1q_f32(beta_data + index);
|
||||
outv = vmulq_f32(outv, gammav);
|
||||
outv = vaddq_f32(outv, betav);
|
||||
vst1q_f32(dst + index, outv);
|
||||
}
|
||||
#endif
|
||||
for (; index < num; index++) {
|
||||
dst[index] = (src[index] - mean) * (deno);
|
||||
dst[index] = dst[index] * gamma_data[index] + beta_data[index];
|
||||
}
|
||||
}
|
||||
|
||||
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
|
||||
LayerNormParameter *param, size_t task_id) {
|
||||
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
||||
for (size_t j = task_id; j < outer_size; j += thread_num) {
|
||||
const float *src = src_data + j * inner_size;
|
||||
float *dst = dst_data + j * inner_size;
|
||||
int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
|
||||
int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
|
||||
for (int i = task_id * step; i < thread_end; i++) {
|
||||
const float *src_norm = src_data + i * param->norm_inner_size_;
|
||||
float *dst_norm = dst_data + i * param->norm_inner_size_;
|
||||
float mean = 0.0f;
|
||||
float square_mean = 0.0f;
|
||||
LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &mean, &square_mean);
|
||||
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
|
||||
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t sum = vdupq_n_f32(0);
|
||||
float32x4_t square_sum = vdupq_n_f32(0);
|
||||
for (; index < inner_size - C8NUM; index += C8NUM) {
|
||||
float32x4_t srcv1 = vld1q_f32(src + index);
|
||||
float32x4_t srcv2 = vld1q_f32(src + index + 4);
|
||||
float32x4_t squarev1 = vmulq_f32(srcv1, srcv1);
|
||||
float32x4_t squarev2 = vmulq_f32(srcv2, srcv2);
|
||||
sum = vaddq_f32(sum, srcv1);
|
||||
sum = vaddq_f32(sum, srcv2);
|
||||
square_sum = vaddq_f32(square_sum, squarev1);
|
||||
square_sum = vaddq_f32(square_sum, squarev2);
|
||||
}
|
||||
mean = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
|
||||
#endif
|
||||
for (; index < inner_size; index++) {
|
||||
mean += src[index];
|
||||
square_mean += src[index] * src[index];
|
||||
}
|
||||
|
||||
mean /= (float)inner_size;
|
||||
square_mean /= (float)inner_size;
|
||||
const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
|
||||
|
||||
index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t meanv = vdupq_n_f32(mean);
|
||||
float32x4_t denov = vdupq_n_f32(deno);
|
||||
if (elementwise_mode != 0) {
|
||||
for (; index < inner_size - C8NUM; index += C8NUM) {
|
||||
float32x4_t srcv1 = vld1q_f32(src + index);
|
||||
float32x4_t srcv2 = vld1q_f32(src + index + 4);
|
||||
float32x4_t outv1 = vsubq_f32(srcv1, meanv);
|
||||
float32x4_t outv2 = vsubq_f32(srcv2, meanv);
|
||||
outv1 = vmulq_f32(outv1, denov);
|
||||
outv2 = vmulq_f32(outv2, denov);
|
||||
if (elementwise_mode == 1) {
|
||||
float32x4_t gammav1 = vdupq_n_f32(gamma_data[j]);
|
||||
float32x4_t betav1 = vdupq_n_f32(beta_data[j]);
|
||||
outv1 = vmulq_f32(outv1, gammav1);
|
||||
outv2 = vmulq_f32(outv2, gammav1);
|
||||
outv1 = vaddq_f32(outv1, betav1);
|
||||
outv2 = vaddq_f32(outv2, betav1);
|
||||
} else {
|
||||
float32x4_t gammav1 = vld1q_f32(gamma_data + index);
|
||||
float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4);
|
||||
float32x4_t betav1 = vld1q_f32(beta_data + index);
|
||||
float32x4_t betav2 = vld1q_f32(beta_data + index + 4);
|
||||
outv1 = vmulq_f32(outv1, gammav1);
|
||||
outv2 = vmulq_f32(outv2, gammav2);
|
||||
outv1 = vaddq_f32(outv1, betav1);
|
||||
outv2 = vaddq_f32(outv2, betav2);
|
||||
}
|
||||
vst1q_f32(dst + index, outv1);
|
||||
vst1q_f32(dst + index + 4, outv2);
|
||||
if (param->norm_outer_size_ <= param->params_outer_size_) {
|
||||
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
|
||||
const float *src_param = src_norm + x * param->params_inner_size_;
|
||||
float *dst_param = dst_norm + x * param->params_inner_size_;
|
||||
LayerNormGammaAndBeta(dst_param, src_param, gamma_data, beta_data, param->params_inner_size_, mean, deno);
|
||||
}
|
||||
} else {
|
||||
for (; index < inner_size - C8NUM; index += C8NUM) {
|
||||
float32x4_t srcv1 = vld1q_f32(src + index);
|
||||
float32x4_t srcv2 = vld1q_f32(src + index + 4);
|
||||
float32x4_t outv1 = vsubq_f32(srcv1, meanv);
|
||||
float32x4_t outv2 = vsubq_f32(srcv2, meanv);
|
||||
outv1 = vmulq_f32(outv1, denov);
|
||||
outv2 = vmulq_f32(outv2, denov);
|
||||
vst1q_f32(dst + index, outv1);
|
||||
vst1q_f32(dst + index + 4, outv2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; index < inner_size; index++) {
|
||||
dst[index] = (src[index] - mean) * deno;
|
||||
if (elementwise_mode == 1) {
|
||||
dst[index] = dst[index] * gamma_data[j] + beta_data[j];
|
||||
} else if (elementwise_mode == 2) {
|
||||
dst[index] = dst[index] * gamma_data[index] + beta_data[index];
|
||||
}
|
||||
int x = i / param->norm_outer_size_;
|
||||
const float *src_param = src_norm + x * param->params_inner_size_;
|
||||
float *dst_param = dst_norm + x * param->params_inner_size_;
|
||||
const float *gamma = gamma_data + x * param->params_inner_size_;
|
||||
const float *beta = beta_data + x * param->params_inner_size_;
|
||||
LayerNormGammaAndBeta(dst_param, src_param, gamma, beta, param->norm_inner_size_, mean, deno);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
|
|
|
@ -23,9 +23,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data,
|
||||
const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data,
|
||||
size_t task_id, size_t thread_num);
|
||||
int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data,
|
||||
LayerNormParameter *param, size_t task_id);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -23,9 +23,10 @@ typedef struct InstanceNormParameter {
|
|||
// Primitive parameter
|
||||
OpParameter op_parameter_;
|
||||
float epsilon_;
|
||||
float momentum_;
|
||||
// shape correlative
|
||||
int batch_;
|
||||
int channel_;
|
||||
int inner_size_;
|
||||
} InstanceNormParameter;
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_INSTANCE_NORM_PARAMETER_H_
|
||||
|
|
|
@ -16,45 +16,58 @@
|
|||
|
||||
#include "nnacl/int8/layer_norm_int8.h"
|
||||
|
||||
void LayerNormGammaAndBetaInt8(int8_t *dst, const int8_t *src, const float *gamma_data, const float *beta_data,
|
||||
LayerNormQuantArg *quant, int num, const float mean, const float deno) {
|
||||
for (int i = 0; i < num; i++) {
|
||||
float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
|
||||
float fp32_dst = (fp32_src - mean) * deno;
|
||||
fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
|
||||
int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
|
||||
dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* origin : (x-mean) / sqrt(variance + epsilon) * gamma + beta
|
||||
* quant : (x-mean) / sqrt(sum(x * x) - mean * mean) * gamma + beta
|
||||
*
|
||||
* */
|
||||
int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
|
||||
enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
|
||||
float epsilon) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
LayerNormParameter *param, LayerNormQuantArg *quant, int task_id) {
|
||||
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
||||
if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
||||
for (int out_index = 0; out_index < outer_size; out_index++) {
|
||||
const int8_t *src = src_data + out_index * inner_size;
|
||||
int8_t *dst = dst_data + out_index * inner_size;
|
||||
int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_);
|
||||
int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_);
|
||||
for (int i = task_id * step; i < thread_end; i++) {
|
||||
const int8_t *src_norm = src_data + i * param->norm_inner_size_;
|
||||
int8_t *dst_norm = dst_data + i * param->norm_inner_size_;
|
||||
float mean = 0.0f;
|
||||
float square_mean = 0.0f;
|
||||
for (int i = 0; i < inner_size; i++) {
|
||||
float float_src = (src[i] - quant->in_zp_) * quant->in_scale_;
|
||||
for (int j = 0; j < param->norm_inner_size_; j++) {
|
||||
float float_src = (src_norm[j] - quant->in_zp_) * quant->in_scale_;
|
||||
mean += float_src;
|
||||
square_mean += float_src * float_src;
|
||||
}
|
||||
mean /= (float)inner_size;
|
||||
square_mean /= (float)inner_size;
|
||||
const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon);
|
||||
for (int i = 0; i < inner_size; i++) {
|
||||
float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_;
|
||||
float fp32_dst = (fp32_src - mean) * deno;
|
||||
if (elementwise_mode == 1) {
|
||||
fp32_dst = fp32_dst * gamma_data[out_index] + beta_data[out_index];
|
||||
} else if (elementwise_mode == 2) {
|
||||
fp32_dst = fp32_dst * gamma_data[i] + beta_data[i];
|
||||
mean /= (float)param->norm_inner_size_;
|
||||
square_mean /= (float)param->norm_inner_size_;
|
||||
const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
|
||||
|
||||
if (param->norm_outer_size_ <= param->params_outer_size_) {
|
||||
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
|
||||
const int8_t *src_param = src_norm + x * param->params_inner_size_;
|
||||
int8_t *dst_param = dst_norm + x * param->params_inner_size_;
|
||||
LayerNormGammaAndBetaInt8(dst_param, src_param, gamma_data, beta_data, quant, param->norm_inner_size_, mean,
|
||||
deno);
|
||||
}
|
||||
int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_);
|
||||
dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128);
|
||||
} else {
|
||||
int x = i / param->norm_outer_size_;
|
||||
const int8_t *src_param = src_norm + x * param->params_inner_size_;
|
||||
int8_t *dst_param = dst_norm + x * param->params_inner_size_;
|
||||
const float *gamma = gamma_data + x * param->params_inner_size_;
|
||||
const float *beta = beta_data + x * param->params_inner_size_;
|
||||
LayerNormGammaAndBetaInt8(dst_param, src_param, gamma, beta, quant, param->norm_inner_size_, mean, deno);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
|
|
|
@ -25,8 +25,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data,
|
||||
enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant,
|
||||
float epsilon);
|
||||
LayerNormParameter *param, LayerNormQuantArg *quant, int task_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -24,10 +24,15 @@ typedef struct LayerNormParameter {
|
|||
// Primitive parameter
|
||||
OpParameter op_parameter_;
|
||||
float epsilon_;
|
||||
enum ElementwiseMode elementwise_mode_;
|
||||
int begin_norm_axis_;
|
||||
int begin_params_axis_;
|
||||
// shape correlative
|
||||
int normalized_shape_[8];
|
||||
int norm_inner_size_;
|
||||
int norm_outer_size_;
|
||||
int params_inner_size_;
|
||||
int params_outer_size_;
|
||||
int normalized_dims_;
|
||||
int normalized_shape_[8];
|
||||
// other parameter
|
||||
int thread_count_;
|
||||
int thread_outsize_;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
namespace mindspore.schema;
|
||||
|
||||
enum ResizeMethod: byte {
|
||||
UNKNOW = -1,
|
||||
UNKNOWN = -1,
|
||||
LINEAR = 0,
|
||||
NEAREST = 1,
|
||||
CUBIC = 2
|
||||
|
@ -80,7 +80,7 @@ enum ActivationType : byte {
|
|||
HARD_TANH = 16,
|
||||
SIGN = 17,
|
||||
SWISH = 18,
|
||||
UNKNOW = 19
|
||||
UNKNOWN = 19
|
||||
}
|
||||
enum ActivationGradType : byte {
|
||||
NO_ACTIVATION = 0,
|
||||
|
@ -99,7 +99,7 @@ enum ActivationGradType : byte {
|
|||
HSIGMOID = 13,
|
||||
THRESHOLDRELU = 14,
|
||||
LINEAR = 15,
|
||||
UNKNOW = 16
|
||||
UNKNOWN = 16
|
||||
}
|
||||
enum ReduceType : byte {
|
||||
REDUCE_MAX = 0,
|
||||
|
@ -109,7 +109,7 @@ enum ReduceType : byte {
|
|||
REDUCE_LOG_SUM_EXP = 4,
|
||||
REDUCE_PROD = 5,
|
||||
REDUCE_SUM = 6,
|
||||
UNKNOW = 7
|
||||
UNKNOWN = 7
|
||||
}
|
||||
|
||||
enum PoolMode : byte {
|
||||
|
@ -121,7 +121,7 @@ enum EltwiseMode : byte {
|
|||
PROD = 0,
|
||||
SUM = 1,
|
||||
MAXIMUM = 2,
|
||||
UNKNOW = 3
|
||||
UNKNOWN = 3
|
||||
}
|
||||
|
||||
enum PadMode : byte {
|
||||
|
@ -1144,9 +1144,9 @@ table Identity {
|
|||
}
|
||||
|
||||
table LayerNorm {
|
||||
normalizedShape : [int];
|
||||
begin_norm_axis : int;
|
||||
begin_params_axis : int;
|
||||
epsilon : float = 0.00001;
|
||||
elementwiseAffine : bool;
|
||||
}
|
||||
|
||||
table While {
|
||||
|
|
|
@ -22,19 +22,14 @@
|
|||
namespace mindspore {
|
||||
namespace lite {
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
std::vector<int> LayerNorm::GetNormalizedShape() const {
|
||||
return this->primitive_->value.AsLayerNorm()->normalizedShape;
|
||||
}
|
||||
float LayerNorm::GetEpsilon() const { return this->primitive_->value.AsLayerNorm()->epsilon; }
|
||||
bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value.AsLayerNorm()->elementwiseAffine; }
|
||||
int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value.AsLayerNorm()->begin_norm_axis; }
|
||||
int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value.AsLayerNorm()->begin_params_axis; }
|
||||
|
||||
void LayerNorm::SetNormalizedShape(const std::vector<int> &normalizedShape) {
|
||||
this->primitive_->value.AsLayerNorm()->normalizedShape = normalizedShape;
|
||||
}
|
||||
void LayerNorm::SetEpsilon(float epsilon) { this->primitive_->value.AsLayerNorm()->epsilon = epsilon; }
|
||||
void LayerNorm::SetElementwiseAffine(bool elementwiseAffine) {
|
||||
this->primitive_->value.AsLayerNorm()->elementwiseAffine = elementwiseAffine;
|
||||
}
|
||||
void LayerNorm::SetBeginNormAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_norm_axis = axis; }
|
||||
void LayerNorm::SetBeginParamsAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_params_axis = axis; }
|
||||
|
||||
int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
|
||||
if (this->primitive_ == nullptr) {
|
||||
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
|
||||
|
@ -60,12 +55,17 @@ int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &
|
|||
} else {
|
||||
layer_norm_attr->epsilon = 1e-7;
|
||||
}
|
||||
value_attr = prim.GetAttr("normalized_shape");
|
||||
if (value_attr != nullptr) {
|
||||
layer_norm_attr->normalizedShape = CastToInt(value_attr);
|
||||
auto norm_axis_attr = prim.GetAttr("begin_norm_axis");
|
||||
if (norm_axis_attr != nullptr) {
|
||||
layer_norm_attr->begin_norm_axis = GetValue<float>(norm_axis_attr);
|
||||
} else {
|
||||
layer_norm_attr->begin_norm_axis = -1;
|
||||
}
|
||||
if (inputs.size() == 3) {
|
||||
layer_norm_attr->elementwiseAffine = true;
|
||||
auto params_axis_attr = prim.GetAttr("begin_params_axis");
|
||||
if (params_axis_attr != nullptr) {
|
||||
layer_norm_attr->begin_params_axis = GetValue<float>(params_axis_attr);
|
||||
} else {
|
||||
layer_norm_attr->begin_params_axis = -1;
|
||||
}
|
||||
this->primitive_->value.value = layer_norm_attr;
|
||||
}
|
||||
|
@ -81,28 +81,20 @@ int LayerNorm::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffe
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
std::vector<int32_t> normalizedShape;
|
||||
if (attr->normalizedShape() != nullptr) {
|
||||
for (int i = 0; i < static_cast<int>(attr->normalizedShape()->size()); i++) {
|
||||
normalizedShape.push_back(attr->normalizedShape()->data()[i]);
|
||||
}
|
||||
}
|
||||
auto val_offset = schema::CreateLayerNormDirect(*fbb, &normalizedShape, attr->epsilon(), attr->elementwiseAffine());
|
||||
auto val_offset = schema::CreateLayerNorm(*fbb, attr->epsilon(), attr->begin_norm_axis(), attr->begin_params_axis());
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_LayerNorm, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
std::vector<int> LayerNorm::GetNormalizedShape() const {
|
||||
auto fb_vector = this->primitive_->value_as_LayerNorm()->normalizedShape();
|
||||
return std::vector<int>(fb_vector->begin(), fb_vector->end());
|
||||
}
|
||||
|
||||
float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); }
|
||||
bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); }
|
||||
int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value_as_LayerNorm()->begin_norm_axis(); }
|
||||
int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value_as_LayerNorm()->begin_params_axis(); }
|
||||
|
||||
PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) {
|
||||
return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive);
|
||||
}
|
||||
Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator);
|
||||
|
||||
#endif
|
||||
int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) {
|
||||
if (outputs_.size() != kSingleNum || (inputs_.size() != kSingleNum && inputs_.size() != kTripleNum)) {
|
||||
|
@ -116,41 +108,13 @@ int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite:
|
|||
output->set_format(input->format());
|
||||
output->set_data_type(input->data_type());
|
||||
|
||||
if (GetElementwiseAffine() && inputs_.size() != kTripleNum) {
|
||||
MS_LOG(INFO) << "input tensor amount error";
|
||||
return RET_INPUT_TENSOR_ERROR;
|
||||
}
|
||||
if (!GetElementwiseAffine() && inputs_.size() != kSingleNum) {
|
||||
MS_LOG(INFO) << "input tensor amount error";
|
||||
return RET_INPUT_TENSOR_ERROR;
|
||||
}
|
||||
if (!infer_flag()) {
|
||||
return RET_INFER_INVALID;
|
||||
}
|
||||
auto input_shape = input->shape();
|
||||
normlized_shape_ = GetNormalizedShape();
|
||||
elementwise_mode_ = GetElementwiseAffine() ? 2 : 0;
|
||||
if (normlized_shape_.size() > input_shape.size()) {
|
||||
MS_LOG(INFO) << "normalized_shape attr invalid";
|
||||
return RET_PARAM_INVALID;
|
||||
for (size_t i = GetBeginNormAxis(); i < input_shape.size(); i++) {
|
||||
normlized_shape_.push_back(input_shape[i]);
|
||||
}
|
||||
if (normlized_shape_.empty()) {
|
||||
// instance norm -> layernorm only for nchw
|
||||
if (input->format() == schema::Format_NCHW) {
|
||||
normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 2, input_shape.end());
|
||||
elementwise_mode_ = 1;
|
||||
} else {
|
||||
normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 1, input_shape.end());
|
||||
}
|
||||
}
|
||||
size_t first_index = input_shape.size() - normlized_shape_.size();
|
||||
for (size_t i = first_index; i < input_shape.size(); ++i) {
|
||||
if (input_shape.at(i) != normlized_shape_.at(i - first_index)) {
|
||||
MS_LOG(INFO) << "normalized_shape attr invalid";
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
output->set_shape(input_shape);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -32,23 +32,21 @@ class LayerNorm : public PrimitiveC {
|
|||
#ifdef PRIMITIVE_WRITEABLE
|
||||
MS_DECLARE_PARENT(LayerNorm, PrimitiveC);
|
||||
explicit LayerNorm(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
void SetNormalizedShape(const std::vector<int> &normalizedShape);
|
||||
void SetEpsilon(float epsilon);
|
||||
void SetElementwiseAffine(bool elementwiseAffine);
|
||||
void SetBeginNormAxis(int axis);
|
||||
void SetBeginParamsAxis(int axis);
|
||||
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
|
||||
#else
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
|
||||
std::vector<int> GetNormalizedShape() const;
|
||||
float GetEpsilon() const;
|
||||
bool GetElementwiseAffine() const;
|
||||
std::vector<int> normlized_shape() const { return normlized_shape_; }
|
||||
int elementwise_mode() const { return elementwise_mode_; }
|
||||
int GetBeginNormAxis() const;
|
||||
int GetBeginParamsAxis() const;
|
||||
std::vector<int> GetNormlizedShape() const { return normlized_shape_; }
|
||||
|
||||
protected:
|
||||
std::vector<int> normlized_shape_;
|
||||
int elementwise_mode_ = 0;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,9 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/ops/populate/layer_norm_populate.h"
|
||||
#include "nnacl/layer_norm_parameter.h"
|
||||
#include <cstdint>
|
||||
#include "src/ops/layer_norm.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/ops/populate/populate_register.h"
|
||||
|
@ -32,20 +30,15 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi
|
|||
memset(layer_norm_parameter, 0, sizeof(LayerNormParameter));
|
||||
layer_norm_parameter->op_parameter_.type_ = primitive->Type();
|
||||
auto param = reinterpret_cast<mindspore::lite::LayerNorm *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
auto normalized_shape = param->normlized_shape();
|
||||
auto normalized_shape = param->GetNormlizedShape();
|
||||
layer_norm_parameter->normalized_dims_ = normalized_shape.size();
|
||||
if (normalized_shape.size() > SIZE_MAX / sizeof(int)) {
|
||||
MS_LOG(ERROR) << "normalized_shape size too big";
|
||||
free(layer_norm_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
MS_ASSERT(normalized_shape.size() < 8);
|
||||
for (size_t i = 0; i < normalized_shape.size(); i++) {
|
||||
layer_norm_parameter->normalized_shape_[i] = normalized_shape[i];
|
||||
}
|
||||
layer_norm_parameter->epsilon_ = param->GetEpsilon();
|
||||
layer_norm_parameter->elementwise_mode_ = static_cast<ElementwiseMode>(param->elementwise_mode());
|
||||
|
||||
layer_norm_parameter->begin_norm_axis_ = param->GetBeginNormAxis();
|
||||
layer_norm_parameter->begin_params_axis_ = param->GetBeginParamsAxis();
|
||||
return reinterpret_cast<OpParameter *>(layer_norm_parameter);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/fp32/instance_norm_fp32.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/fp32/instance_norm_fp32.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_InstanceNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int InstanceNormCPUKernel::Init() {
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::ReSize() {
|
||||
param_->op_parameter_.thread_num_ = context_->thread_num_;
|
||||
auto shape = in_tensors_.front()->shape();
|
||||
param_->batch_ = shape[0];
|
||||
param_->inner_size_ = shape[2] * shape[3];
|
||||
param_->channel_ = shape[1];
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
|
||||
int ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormRun(void *cdata, int task_id) {
|
||||
auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
|
||||
auto ret = kernel->DoInstanceNorm(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::Run() {
|
||||
src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
|
||||
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, LiteKernelCreator<InstanceNormCPUKernel>)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class InstanceNormCPUKernel : public LiteKernel {
|
||||
public:
|
||||
InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
|
||||
}
|
||||
~InstanceNormCPUKernel() override{};
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int DoInstanceNorm(int task_id);
|
||||
|
||||
private:
|
||||
InstanceNormParameter *param_ = nullptr;
|
||||
float *src_data_ = nullptr;
|
||||
float *dst_data_ = nullptr;
|
||||
float *gamma_data_ = nullptr;
|
||||
float *beta_data_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
|
@ -18,7 +18,6 @@
|
|||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/ops/populate/layer_norm_populate.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
|
@ -35,29 +34,37 @@ int LayerNormCPUKernel::Init() {
|
|||
}
|
||||
|
||||
int LayerNormCPUKernel::ReSize() {
|
||||
if (op_parameter_ != nullptr) {
|
||||
free(op_parameter_);
|
||||
op_parameter_ = nullptr;
|
||||
}
|
||||
op_parameter_ = PopulateLayerNormParameter(primitive_);
|
||||
op_parameter_->thread_num_ = context_->thread_num_;
|
||||
param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
|
||||
param_->begin_norm_axis_ = -1;
|
||||
param_->begin_params_axis_ = -1;
|
||||
|
||||
auto shape = in_tensors_.front()->shape();
|
||||
outer_size_ = 1;
|
||||
inner_size_ = 1;
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (i + param_->normalized_dims_ < shape.size()) {
|
||||
outer_size_ *= shape.at(i);
|
||||
} else {
|
||||
inner_size_ *= shape.at(i);
|
||||
}
|
||||
param_->begin_norm_axis_ =
|
||||
param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
|
||||
param_->begin_params_axis_ =
|
||||
param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
|
||||
|
||||
param_->norm_outer_size_ = 1;
|
||||
for (int i = 0; i < param_->begin_norm_axis_; ++i) {
|
||||
param_->norm_outer_size_ *= shape.at(i);
|
||||
}
|
||||
param_->norm_inner_size_ = 1;
|
||||
for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
|
||||
param_->norm_inner_size_ *= shape.at(i);
|
||||
}
|
||||
param_->params_outer_size_ = 1;
|
||||
for (int i = 0; i < param_->begin_params_axis_; ++i) {
|
||||
param_->params_outer_size_ *= shape.at(i);
|
||||
}
|
||||
param_->params_inner_size_ = 1;
|
||||
for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
|
||||
param_->params_inner_size_ *= shape.at(i);
|
||||
}
|
||||
param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormCPUKernel::DoLayerNorm(int thread_id) {
|
||||
int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_mode_,
|
||||
param_->epsilon_, dst_data_, thread_id, op_parameter_->thread_num_);
|
||||
int ret = LayerNorm(src_data_, gamma_data_, beta_data_, dst_data_, param_, thread_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
@ -77,10 +84,8 @@ int LayerNormRun(void *cdata, int task_id) {
|
|||
|
||||
int LayerNormCPUKernel::Run() {
|
||||
src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
if (param_->elementwise_mode_ != 0) {
|
||||
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
|
||||
}
|
||||
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
|
||||
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -40,8 +40,6 @@ class LayerNormCPUKernel : public LiteKernel {
|
|||
|
||||
private:
|
||||
LayerNormParameter *param_ = nullptr;
|
||||
int outer_size_;
|
||||
int inner_size_;
|
||||
float *src_data_ = nullptr;
|
||||
float *dst_data_ = nullptr;
|
||||
float *gamma_data_ = nullptr;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
#include "src/runtime/kernel/arm/int8/layer_norm_int8.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "src/ops/populate/layer_norm_populate.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
|
@ -25,15 +24,14 @@ using mindspore::schema::PrimitiveType_LayerNorm;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() {
|
||||
if (param_->elementwise_mode_ != 0 && gamma_ptr_ != nullptr) {
|
||||
if (gamma_ptr_ != nullptr) {
|
||||
free(gamma_ptr_);
|
||||
gamma_ptr_ = nullptr;
|
||||
}
|
||||
if (param_->elementwise_mode_ != 0 && beta_ptr_ != nullptr) {
|
||||
if (beta_ptr_ != nullptr) {
|
||||
free(beta_ptr_);
|
||||
beta_ptr_ = nullptr;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::SetQuantArgs() {
|
||||
|
@ -45,33 +43,31 @@ int LayerNormInt8CPUKernel::SetQuantArgs() {
|
|||
quant_param_.out_zp_ = output->quant_params().front().zeroPoint;
|
||||
quant_param_.out_scale_ = output->quant_params().front().scale;
|
||||
|
||||
if (param_->elementwise_mode_ != 0) {
|
||||
lite::Tensor *gamma_tensor = in_tensors_.at(1);
|
||||
lite::Tensor *beta_tensor = in_tensors_.at(2);
|
||||
lite::Tensor *gamma_tensor = in_tensors_.at(1);
|
||||
lite::Tensor *beta_tensor = in_tensors_.at(2);
|
||||
|
||||
double gamma_scale = gamma_tensor->quant_params().front().scale;
|
||||
int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
|
||||
gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
|
||||
if (gamma_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
|
||||
for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
|
||||
gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
|
||||
}
|
||||
double gamma_scale = gamma_tensor->quant_params().front().scale;
|
||||
int gamma_zp = gamma_tensor->quant_params().front().zeroPoint;
|
||||
gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float)));
|
||||
if (gamma_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc gamma_ptr_ failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c());
|
||||
for (int i = 0; i < gamma_tensor->ElementsNum(); i++) {
|
||||
gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale;
|
||||
}
|
||||
|
||||
beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
|
||||
if (beta_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc beta_ptr_ failed";
|
||||
free(gamma_ptr_);
|
||||
gamma_ptr_ = nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
|
||||
for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
|
||||
beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
|
||||
}
|
||||
beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float)));
|
||||
if (beta_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc beta_ptr_ failed";
|
||||
free(gamma_ptr_);
|
||||
gamma_ptr_ = nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c());
|
||||
for (int i = 0; i < beta_tensor->ElementsNum(); i++) {
|
||||
beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -86,30 +82,37 @@ int LayerNormInt8CPUKernel::Init() {
|
|||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::ReSize() {
|
||||
if (op_parameter_ != nullptr) {
|
||||
free(op_parameter_);
|
||||
op_parameter_ = nullptr;
|
||||
}
|
||||
op_parameter_ = PopulateLayerNormParameter(primitive_);
|
||||
if (op_parameter_ == nullptr) {
|
||||
MS_LOG(ERROR) << "op_parameter_ is nullptr!";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
op_parameter_->thread_num_ = context_->thread_num_;
|
||||
param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_);
|
||||
auto shape = in_tensors_.front()->shape();
|
||||
outer_size_ = 1;
|
||||
inner_size_ = 1;
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (i + param_->normalized_dims_ < shape.size()) {
|
||||
outer_size_ *= shape.at(i);
|
||||
} else {
|
||||
inner_size_ *= shape.at(i);
|
||||
}
|
||||
}
|
||||
param_->begin_norm_axis_ = -1;
|
||||
param_->begin_params_axis_ = -1;
|
||||
|
||||
param_->thread_count_ = MSMIN(outer_size_, op_parameter_->thread_num_);
|
||||
param_->thread_outsize_ = UP_DIV(outer_size_, param_->thread_count_);
|
||||
auto shape = in_tensors_.front()->shape();
|
||||
param_->begin_norm_axis_ =
|
||||
param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size();
|
||||
param_->begin_params_axis_ =
|
||||
param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size();
|
||||
|
||||
param_->norm_outer_size_ = 1;
|
||||
for (int i = 0; i < param_->begin_norm_axis_; ++i) {
|
||||
param_->norm_outer_size_ *= shape.at(i);
|
||||
}
|
||||
param_->norm_inner_size_ = 1;
|
||||
for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) {
|
||||
param_->norm_inner_size_ *= shape.at(i);
|
||||
}
|
||||
param_->params_outer_size_ = 1;
|
||||
for (int i = 0; i < param_->begin_params_axis_; ++i) {
|
||||
param_->params_outer_size_ *= shape.at(i);
|
||||
}
|
||||
param_->params_inner_size_ = 1;
|
||||
for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) {
|
||||
param_->params_inner_size_ *= shape.at(i);
|
||||
}
|
||||
param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::DoExecute(int task_id) {
|
||||
LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -119,21 +122,6 @@ int LayerNormInt8Run(void *cdata, int task_id) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::DoExecute(int task_id) {
|
||||
int current_out_size = outer_size_ - task_id * param_->thread_outsize_;
|
||||
current_out_size = MSMIN(current_out_size, param_->thread_outsize_);
|
||||
if (current_out_size <= 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
|
||||
int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_;
|
||||
|
||||
LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_mode_, current_out_size, inner_size_,
|
||||
&quant_param_, param_->epsilon_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LayerNormInt8CPUKernel::Run() {
|
||||
src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
|
||||
dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
|
||||
|
|
|
@ -46,8 +46,6 @@ class LayerNormInt8CPUKernel : public LiteKernel {
|
|||
private:
|
||||
LayerNormParameter *param_ = nullptr;
|
||||
LayerNormQuantArg quant_param_;
|
||||
int outer_size_ = 0;
|
||||
int inner_size_ = 0;
|
||||
int8_t *src_ptr_ = nullptr;
|
||||
int8_t *dst_ptr_ = nullptr;
|
||||
float *gamma_ptr_ = nullptr;
|
||||
|
|
|
@ -349,7 +349,7 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case schema::ResizeMethod_UNKNOW:
|
||||
case schema::ResizeMethod_UNKNOWN:
|
||||
default: {
|
||||
MS_LOG(ERROR) << "Resize unknown method " << method_;
|
||||
ret = RET_ERROR;
|
||||
|
|
|
@ -33,24 +33,6 @@ namespace mindspore::kernel {
|
|||
|
||||
int LayerNormOpenCLKernel::CheckSpecs() {
|
||||
auto param = reinterpret_cast<LayerNormParameter *>(this->op_parameter_);
|
||||
if (param->elementwise_mode_ == ELEMENTWISE_PER_NUM) {
|
||||
if (in_tensors_.size() != 3) {
|
||||
MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (param->normalized_dims_ > in_tensors_.at(0)->shape().size()) {
|
||||
MS_LOG(ERROR) << " invalid normalized_shape_ size" << param->normalized_dims_ << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (param->elementwise_mode_ == ELEMENTWISE_NOT) {
|
||||
if (in_tensors_.size() != 1) {
|
||||
MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported elementwise_mode_" << param->elementwise_mode_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors_.at(0)->shape().size() != 4 || out_tensors_.size() != 1) {
|
||||
MS_LOG(ERROR) << "UnSupported in_tensors_.shape.size: " << in_tensors_.at(0)->shape().size()
|
||||
<< " out_tensors_.size(): " << out_tensors_.size();
|
||||
|
|
|
@ -24,7 +24,6 @@ namespace {
|
|||
// PrimitiveType_Stack: src/ops/populate/stack_populate.cc
|
||||
OpParameter *CreateParameter(float epsilon, int normalized_dims_, std::vector<int> normalizedShape) {
|
||||
auto *param = test::CreateParameter<LayerNormParameter>(schema::PrimitiveType_LayerNorm);
|
||||
param->elementwise_mode_ = ELEMENTWISE_PER_NUM;
|
||||
param->epsilon_ = epsilon;
|
||||
param->normalized_dims_ = normalized_dims_;
|
||||
for (int i = 0; i < normalizedShape.size() && i < normalized_dims_; ++i) {
|
||||
|
|
|
@ -48,7 +48,6 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = {
|
|||
schema::PrimitiveType_FusedBatchNorm,
|
||||
schema::PrimitiveType_PReLU,
|
||||
schema::PrimitiveType_BiasAdd,
|
||||
schema::PrimitiveType_InstanceNorm,
|
||||
schema::PrimitiveType_SpaceToDepth,
|
||||
schema::PrimitiveType_DepthToSpace,
|
||||
schema::PrimitiveType_TopK};
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace lite {
|
|||
lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphProto &onnx_graph,
|
||||
const onnx::NodeProto &onnx_node) {
|
||||
MS_LOG(DEBUG) << "onnx InstanceNormParser";
|
||||
auto attr = std::make_unique<schema::LayerNormT>();
|
||||
auto attr = std::make_unique<schema::InstanceNormT>();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "new op failed";
|
||||
return nullptr;
|
||||
|
@ -39,8 +39,7 @@ lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphPr
|
|||
MS_LOG(ERROR) << "new primitive failed";
|
||||
return nullptr;
|
||||
}
|
||||
attr->elementwiseAffine = true;
|
||||
primitive->value.type = schema::PrimitiveType_LayerNorm;
|
||||
primitive->value.type = schema::PrimitiveType_InstanceNorm;
|
||||
primitive->value.value = attr.release();
|
||||
return PrimitiveC::Create(primitive.release());
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ STATUS TFResizeParser::Parse(const tensorflow::NodeDef &tf_op,
|
|||
} else if (tf_op.op() == "ResizeNearestNeighbor") {
|
||||
attr->method = schema::ResizeMethod_NEAREST;
|
||||
} else {
|
||||
attr->method = schema::ResizeMethod_UNKNOW;
|
||||
attr->method = schema::ResizeMethod_UNKNOWN;
|
||||
}
|
||||
auto size_node = tf_node_map.at(tf_op.input(1));
|
||||
if (size_node == nullptr) {
|
||||
|
|
|
@ -114,9 +114,7 @@ CNodePtr LayerNormFusion::CreateLayerNormNode(const FuncGraphPtr &func_graph, co
|
|||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto layer_norm_primitive = std::make_unique<schema::PrimitiveT>();
|
||||
std::unique_ptr<schema::LayerNormT> attr = std::make_unique<schema::LayerNormT>();
|
||||
attr->normalizedShape = shape;
|
||||
attr->epsilon = epsilon;
|
||||
attr->elementwiseAffine = true;
|
||||
layer_norm_primitive->value.type = schema::PrimitiveType_LayerNorm;
|
||||
layer_norm_primitive->value.value = attr.release();
|
||||
auto layer_norm_cvalue = lite::PrimitiveC::Create(layer_norm_primitive.release());
|
||||
|
|
Loading…
Reference in New Issue