!14376 [MSLITE][DEVELOP] clear static check warnings in module of lite runtime op
From: @yangruoqi713 Reviewed-by: @zhang_xue_tong,@zhanghaibo5,@hangangqiang Signed-off-by: @zhanghaibo5
This commit is contained in:
commit
5573f1609a
|
@ -195,7 +195,7 @@ int GeluFp16(const float16_t *src, int length, float16_t *dst, bool approximate)
|
||||||
int C8 = UP_ROUND(length, C8NUM);
|
int C8 = UP_ROUND(length, C8NUM);
|
||||||
for (; i < C8; i += C8NUM) {
|
for (; i < C8; i += C8NUM) {
|
||||||
float16x8_t in = vld1q_f16(src + i);
|
float16x8_t in = vld1q_f16(src + i);
|
||||||
float16x8_t res = 0.5 * in * (1.0 + MS_ERFX8_F16(in / (float16_t)1.4142135623730951f));
|
const float16x8_t res = 0.5 * in * (1.0 + MS_ERFX8_F16(in / (float16_t)1.4142135623730951f));
|
||||||
vst1q_f16(dst + i, res);
|
vst1q_f16(dst + i, res);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -59,7 +59,7 @@ static inline void single_exp_fp16(float16_t src, float16_t *dst) {
|
||||||
int integer = (float)src / param[0];
|
int integer = (float)src / param[0];
|
||||||
float decimal = (float)src - integer * param[0];
|
float decimal = (float)src - integer * param[0];
|
||||||
int int_exp = (integer + 127) << 23;
|
int int_exp = (integer + 127) << 23;
|
||||||
float decimal_exp =
|
const float decimal_exp =
|
||||||
1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
|
1.0f + decimal * (1.0f + decimal * (0.5f + decimal * (param[3] + decimal * (param[2] + decimal * param[1]))));
|
||||||
*dst = (float16_t)(*((float *)&int_exp) * decimal_exp);
|
*dst = (float16_t)(*((float *)&int_exp) * decimal_exp);
|
||||||
}
|
}
|
||||||
|
|
|
@ -339,7 +339,7 @@ bool CheckConvDw1DWinograd(const ConvParameter *conv_param, int thread_num) {
|
||||||
conv_param->stride_h_ == 1 && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 &&
|
conv_param->stride_h_ == 1 && conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1 &&
|
||||||
conv_param->pad_u_ == 1 && conv_param->pad_d_ == 1 && conv_param->pad_l_ == 1 && conv_param->pad_r_ == 1 &&
|
conv_param->pad_u_ == 1 && conv_param->pad_d_ == 1 && conv_param->pad_l_ == 1 && conv_param->pad_r_ == 1 &&
|
||||||
conv_param->input_channel_ == conv_param->output_channel_ &&
|
conv_param->input_channel_ == conv_param->output_channel_ &&
|
||||||
conv_param->output_h_ / thread_num >= 4; // better had more than 4 rows for each thread
|
conv_param->output_h_ >= thread_num * 4; // better had more than 4 rows for each thread
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvDw3x3RowLeft(const float *src, float *line, int lw, int channel) {
|
void ConvDw3x3RowLeft(const float *src, float *line, int lw, int channel) {
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
void LayerNormGrad(const float *x, const float *dy, const float *var, const float *mean, const float *gamma,
|
void LayerNormGrad(const float *x, const float *dy, const float *var, const float *mean, const float *gamma,
|
||||||
int param_num, int param_size, int block_num, int block_size, float *dx, float *dg, float *db) {
|
int param_num, int param_size, int block_num, int block_size, float *dx, float *dg, float *db) {
|
||||||
// var is actually layer_norm forward output var
|
// var is actually layer_norm forward output var
|
||||||
float eps = 1e-12;
|
const float eps = 1e-12;
|
||||||
const float *var_sqrt_rev = var;
|
const float *var_sqrt_rev = var;
|
||||||
for (size_t i = 0; i < param_num; ++i) {
|
for (size_t i = 0; i < param_num; ++i) {
|
||||||
float dgamma = 0.0f;
|
float dgamma = 0.0f;
|
||||||
|
@ -37,23 +37,23 @@ void LayerNormGrad(const float *x, const float *dy, const float *var, const floa
|
||||||
float sum1 = 0.0f;
|
float sum1 = 0.0f;
|
||||||
float sum2 = 0.0f;
|
float sum2 = 0.0f;
|
||||||
float sum3 = 0.0f;
|
float sum3 = 0.0f;
|
||||||
for (size_t j = i * block_size; j < (i + 1) * block_size; ++j) {
|
for (size_t j = 0; j < block_size; ++j) {
|
||||||
int param_shift = j % param_num;
|
int index = i * block_size + j;
|
||||||
int norm_shift = (int)(j / block_size);
|
float dxm = x[index] - mean[i];
|
||||||
float dxm = x[j] - mean[norm_shift];
|
int param_shift = index % param_num;
|
||||||
float dyg = dy[j] * gamma[param_shift];
|
float dyg = dy[index] * gamma[param_shift];
|
||||||
sum1 += -0.5f * dyg * dxm * pow(var_sqrt_rev[norm_shift] + eps, -1.5);
|
sum1 += -0.5f * dyg * dxm * pow(var_sqrt_rev[i] + eps, -1.5);
|
||||||
sum2 += dyg;
|
sum2 += dyg;
|
||||||
sum3 += -2.0f * dxm;
|
sum3 += -2.0f * dxm;
|
||||||
}
|
}
|
||||||
for (size_t j = i * block_size; j < (i + 1) * block_size; ++j) {
|
for (size_t j = 0; j < block_size; ++j) {
|
||||||
int param_shift = j % param_num;
|
int index = i * block_size + j;
|
||||||
int norm_shift = (int)(j / block_size);
|
float var_sqrt = pow(var_sqrt_rev[i] + eps, -0.5);
|
||||||
float var_sqrt = pow(var_sqrt_rev[norm_shift] + eps, -0.5);
|
int param_shift = index % param_num;
|
||||||
float dx1 = dy[j] * gamma[param_shift] * var_sqrt;
|
float dx1 = dy[index] * gamma[param_shift] * var_sqrt;
|
||||||
float dx2 = sum1 * 2.0f / block_size * (x[j] - mean[norm_shift]);
|
float dx2 = sum1 * 2.0f / block_size * (x[index] - mean[i]);
|
||||||
float dx3 = (-1.0f * var_sqrt * sum2 + (1.0f / block_size) * sum1 * sum3) * (1.0f / block_size);
|
float dx3 = (-1.0f * var_sqrt * sum2 + (1.0f / block_size) * sum1 * sum3) * (1.0f / block_size);
|
||||||
dx[j] = dx1 + dx2 + dx3;
|
dx[index] = dx1 + dx2 + dx3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,8 +144,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dy_ptr, float *output_p
|
||||||
int xw = yw * stride_w + kw - pad_w;
|
int xw = yw * stride_w + kw - pad_w;
|
||||||
int val_idx = (xw + in_w * xh) * channel + ic;
|
int val_idx = (xw + in_w * xh) * channel + ic;
|
||||||
#ifdef ENABLE_ARM
|
#ifdef ENABLE_ARM
|
||||||
unsigned int val_idx_vec[] = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3};
|
uint32x4_t index = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3};
|
||||||
uint32x4_t index = vld1q_u32(val_idx_vec);
|
|
||||||
float32x4_t in = vld1q_f32(inPtr + val_idx);
|
float32x4_t in = vld1q_f32(inPtr + val_idx);
|
||||||
max_idx = MaxIndex(in, &max_val, index, max_idx);
|
max_idx = MaxIndex(in, &max_val, index, max_idx);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -52,17 +52,17 @@ void ResizeBiLinearGrad(float *in_addr, float *out_addr, int batch_size, int cha
|
||||||
size_t h = i / param->in_width_;
|
size_t h = i / param->in_width_;
|
||||||
size_t w = i % param->in_width_;
|
size_t w = i % param->in_width_;
|
||||||
for (int32_t c = 0; c < channel; ++c) {
|
for (int32_t c = 0; c < channel; ++c) {
|
||||||
float in_y = (float)h * param->height_scale_;
|
const float in_y = (float)h * param->height_scale_;
|
||||||
size_t top_y_index = MSMAX((size_t)(floorf(in_y)), (size_t)(0));
|
size_t top_y_index = MSMAX((size_t)(floorf(in_y)), (size_t)(0));
|
||||||
size_t bottom_y_index = MSMIN((size_t)(ceilf(in_y)), param->out_height_ - 1);
|
size_t bottom_y_index = MSMIN((size_t)(ceilf(in_y)), param->out_height_ - 1);
|
||||||
float y_lerp = in_y - floorf(in_y);
|
const float y_lerp = in_y - floorf(in_y);
|
||||||
float inverse_y_lerp = 1.0 - y_lerp;
|
const float inverse_y_lerp = 1.0 - y_lerp;
|
||||||
|
|
||||||
float in_x = (float)w * param->width_scale_;
|
const float in_x = (float)w * param->width_scale_;
|
||||||
size_t left_x_index = MSMAX((size_t)(floorf(in_x)), (size_t)(0));
|
size_t left_x_index = MSMAX((size_t)(floorf(in_x)), (size_t)(0));
|
||||||
size_t right_x_index = MSMIN((size_t)(ceilf(in_x)), param->out_width_ - 1);
|
size_t right_x_index = MSMIN((size_t)(ceilf(in_x)), param->out_width_ - 1);
|
||||||
float x_lerp = in_x - floorf(in_x);
|
const float x_lerp = in_x - floorf(in_x);
|
||||||
float inverse_x_lerp = 1.0 - x_lerp;
|
const float inverse_x_lerp = 1.0 - x_lerp;
|
||||||
|
|
||||||
size_t in_offset = h * (param->in_width_ * channel) + (w * channel) + c;
|
size_t in_offset = h * (param->in_width_ * channel) + (w * channel) + c;
|
||||||
size_t out_offset_top_y_left_x = top_y_index * (param->out_width_ * channel) + (left_x_index * channel) + c;
|
size_t out_offset_top_y_left_x = top_y_index * (param->out_width_ * channel) + (left_x_index * channel) + c;
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
|
|
||||||
int UnsortedSegmentSum(const float *input, int unit_num, int input_dim1, const int *indices, float *output,
|
int UnsortedSegmentSum(const float *input, int unit_num, int input_dim1, const int *indices, float *output,
|
||||||
int output_dim0, int output_dim1) {
|
int output_dim0, int output_dim1) {
|
||||||
|
if (input_dim1 == 0) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
for (int i = 0; i < unit_num; ++i) {
|
for (int i = 0; i < unit_num; ++i) {
|
||||||
int j = i / input_dim1;
|
int j = i / input_dim1;
|
||||||
int k = i % input_dim1;
|
int k = i % input_dim1;
|
||||||
|
|
|
@ -368,6 +368,9 @@ int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
|
||||||
}
|
}
|
||||||
|
|
||||||
int VectorCInit(VectorC *vc, size_t per_malloc_size) {
|
int VectorCInit(VectorC *vc, size_t per_malloc_size) {
|
||||||
|
if (per_malloc_size == 0) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
vc->data_ = (int *)malloc(per_malloc_size * sizeof(int));
|
vc->data_ = (int *)malloc(per_malloc_size * sizeof(int));
|
||||||
if (vc->data_ == NULL) {
|
if (vc->data_ == NULL) {
|
||||||
return NNACL_ERR;
|
return NNACL_ERR;
|
||||||
|
|
|
@ -63,10 +63,10 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
|
||||||
size_t min_sizes_size = param->min_sizes_size;
|
size_t min_sizes_size = param->min_sizes_size;
|
||||||
size_t max_sizes_size = param->max_sizes_size;
|
size_t max_sizes_size = param->max_sizes_size;
|
||||||
int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size;
|
int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size;
|
||||||
int kPriorBoxPoints = 4;
|
const int kPriorBoxPoints = 4;
|
||||||
int kPriorBoxN = 1;
|
const int kPriorBoxN = 1;
|
||||||
int kPriorBoxW = 1;
|
const int kPriorBoxW = 1;
|
||||||
int kPriorBoxC = 2;
|
const int kPriorBoxC = 2;
|
||||||
|
|
||||||
int32_t h = GetHeight(input) * GetWidth(input) * num_priors_box * kPriorBoxPoints;
|
int32_t h = GetHeight(input) * GetWidth(input) * num_priors_box * kPriorBoxPoints;
|
||||||
output->shape_size_ = 4;
|
output->shape_size_ = 4;
|
||||||
|
|
|
@ -68,7 +68,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
|
||||||
}
|
}
|
||||||
bool keep_dims = param->keep_dims_;
|
bool keep_dims = param->keep_dims_;
|
||||||
int out_shape[MAX_SHAPE_SIZE];
|
int out_shape[MAX_SHAPE_SIZE];
|
||||||
size_t out_shape_size = 0;
|
const size_t out_shape_size = 0;
|
||||||
// get axes from input tensor
|
// get axes from input tensor
|
||||||
const TensorC *axes_input = inputs[1];
|
const TensorC *axes_input = inputs[1];
|
||||||
if (axes_input->shape_size_ == 1 && axes_input->shape_[0] == 0) {
|
if (axes_input->shape_size_ == 1 && axes_input->shape_[0] == 0) {
|
||||||
|
|
|
@ -75,6 +75,9 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size)
|
||||||
|
|
||||||
int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_shape, size_t *out_shape_size) {
|
int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_shape, size_t *out_shape_size) {
|
||||||
const TensorC *shape_tensor = inputs[1];
|
const TensorC *shape_tensor = inputs[1];
|
||||||
|
if (shape_size == 0) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
switch (shape_tensor->data_type_) {
|
switch (shape_tensor->data_type_) {
|
||||||
case kNumberTypeInt8: {
|
case kNumberTypeInt8: {
|
||||||
int8_t *data = (int8_t *)(shape_tensor->data_);
|
int8_t *data = (int8_t *)(shape_tensor->data_);
|
||||||
|
|
|
@ -59,8 +59,7 @@ int HandleAxesCheckNull(const TensorC *input_tensor, const TensorC *begin_tensor
|
||||||
return NNACL_OK;
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape_, int *begins_, int *strides_,
|
int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape, int *begins, int *strides, int *ends) {
|
||||||
int *ends_) {
|
|
||||||
const TensorC *input_tensor = inputs[0];
|
const TensorC *input_tensor = inputs[0];
|
||||||
const TensorC *begin_tensor = inputs[1];
|
const TensorC *begin_tensor = inputs[1];
|
||||||
int *begin_data = (int *)(begin_tensor->data_);
|
int *begin_data = (int *)(begin_tensor->data_);
|
||||||
|
@ -73,7 +72,7 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape
|
||||||
}
|
}
|
||||||
|
|
||||||
// when input contains axes, begins, ends, strides will be expand to the same length as input rank
|
// when input contains axes, begins, ends, strides will be expand to the same length as input rank
|
||||||
*ndim_ = (int)(input_tensor->shape_size_);
|
*ndim = (int)(input_tensor->shape_size_);
|
||||||
int begin_ndim = GetElementNum(begin_tensor);
|
int begin_ndim = GetElementNum(begin_tensor);
|
||||||
|
|
||||||
int *axes_data = NULL;
|
int *axes_data = NULL;
|
||||||
|
@ -111,20 +110,20 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape
|
||||||
}
|
}
|
||||||
for (int i = 0; i < begin_ndim; ++i) {
|
for (int i = 0; i < begin_ndim; ++i) {
|
||||||
if (axes[i] < 0) {
|
if (axes[i] < 0) {
|
||||||
axes[i] += *ndim_;
|
axes[i] += *ndim;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < *ndim_; i++) {
|
for (size_t i = 0; i < *ndim; i++) {
|
||||||
in_shape_[i] = 0;
|
in_shape[i] = 0;
|
||||||
begins_[i] = 0;
|
begins[i] = 0;
|
||||||
strides_[i] = 0;
|
strides[i] = 0;
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < *ndim_; ++i) {
|
for (size_t i = 0; i < *ndim; ++i) {
|
||||||
in_shape_[i] = input_tensor->shape_[i];
|
in_shape[i] = input_tensor->shape_[i];
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < *ndim_; ++i) {
|
for (size_t i = 0; i < *ndim; ++i) {
|
||||||
int axes_it = 0;
|
int axes_it = 0;
|
||||||
for (size_t j = 0; j < begin_ndim; j++) {
|
for (size_t j = 0; j < begin_ndim; j++) {
|
||||||
if (axes[j] == i) {
|
if (axes[j] == i) {
|
||||||
|
@ -137,13 +136,13 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim_, int *in_shape
|
||||||
if (axes_it != begin_ndim) {
|
if (axes_it != begin_ndim) {
|
||||||
int axis = axes_it;
|
int axis = axes_it;
|
||||||
// begins or ends exceed limit will be set to limit
|
// begins or ends exceed limit will be set to limit
|
||||||
begins_[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
|
begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
|
||||||
ends_[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1);
|
ends[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1);
|
||||||
strides_[i] = stride_data[axis];
|
strides[i] = stride_data[axis];
|
||||||
} else {
|
} else {
|
||||||
begins_[i] = 0;
|
begins[i] = 0;
|
||||||
ends_[i] = input_tensor->shape_[i];
|
ends[i] = input_tensor->shape_[i];
|
||||||
strides_[i] = 1;
|
strides[i] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NNACL_OK;
|
return NNACL_OK;
|
||||||
|
@ -174,18 +173,18 @@ void Bit2Vector(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParamet
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape_,
|
void ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape,
|
||||||
size_t *in_shape_size) {
|
size_t *out_shape_size) {
|
||||||
for (size_t i = 0; i < transfer_buffer->new_axis_mask_size_; i++) {
|
for (size_t i = 0; i < transfer_buffer->new_axis_mask_size_; i++) {
|
||||||
if (transfer_buffer->new_axis_mask_[i]) {
|
if (transfer_buffer->new_axis_mask_[i]) {
|
||||||
transfer_buffer->ndim_ += 1;
|
transfer_buffer->ndim_ += 1;
|
||||||
ShapeInsert(in_shape_, in_shape_size, i, 1);
|
ShapeInsert(in_shape, out_shape_size, i, 1);
|
||||||
transfer_buffer->begins_[i] = 0;
|
transfer_buffer->begins_[i] = 0;
|
||||||
transfer_buffer->ends_[i] = 1;
|
transfer_buffer->ends_[i] = 1;
|
||||||
transfer_buffer->strides_[i] = 1;
|
transfer_buffer->strides_[i] = 1;
|
||||||
|
|
||||||
ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0);
|
ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0);
|
||||||
ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape_[transfer_buffer->ndim_ - 1]);
|
ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[transfer_buffer->ndim_ - 1]);
|
||||||
ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1);
|
ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1);
|
||||||
|
|
||||||
transfer_buffer->begins_mask_[i] = false;
|
transfer_buffer->begins_mask_[i] = false;
|
||||||
|
@ -204,33 +203,45 @@ void ApplyBeginMask(StridedSliceTransferBuffer *transfer_buffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ApplyEndMask(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) {
|
int ApplyEndMask(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) {
|
||||||
for (int i = 0; i < transfer_buffer->ndim_; i++) {
|
for (int i = 0; i < transfer_buffer->ndim_; i++) {
|
||||||
if (transfer_buffer->ends_mask_[i]) {
|
if (transfer_buffer->ends_mask_[i]) {
|
||||||
transfer_buffer->ends_[i] = in_shape_[i];
|
if (i >= in_shape_size) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
|
transfer_buffer->ends_[i] = in_shape[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ApplyEllipsisMask(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) {
|
int ApplyEllipsisMask(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) {
|
||||||
for (size_t i = 0; i < transfer_buffer->ellipsis_mask_size_; i++) {
|
for (size_t i = 0; i < transfer_buffer->ellipsis_mask_size_; i++) {
|
||||||
if (transfer_buffer->ellipsis_mask_[i]) {
|
if (transfer_buffer->ellipsis_mask_[i]) {
|
||||||
|
if (i >= in_shape_size) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
transfer_buffer->begins_[i] = 0;
|
transfer_buffer->begins_[i] = 0;
|
||||||
transfer_buffer->ends_[i] = in_shape_[i];
|
transfer_buffer->ends_[i] = in_shape[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TransIndexToPositive(StridedSliceTransferBuffer *transfer_buffer, int *in_shape_) {
|
int TransIndexToPositive(StridedSliceTransferBuffer *transfer_buffer, const int *in_shape, size_t in_shape_size) {
|
||||||
for (int i = 0; i < (int)(transfer_buffer->begins_size_); ++i) {
|
for (size_t i = 0; i < transfer_buffer->begins_size_; i++) {
|
||||||
|
if (i >= in_shape_size) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
if (transfer_buffer->begins_[i] < 0) {
|
if (transfer_buffer->begins_[i] < 0) {
|
||||||
transfer_buffer->begins_[i] += in_shape_[i];
|
transfer_buffer->begins_[i] += in_shape[i];
|
||||||
}
|
}
|
||||||
if (transfer_buffer->ends_[i] < 0) {
|
if (transfer_buffer->ends_[i] < 0) {
|
||||||
transfer_buffer->ends_[i] += in_shape_[i];
|
transfer_buffer->ends_[i] += in_shape[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_shape, size_t *output_shape_size) {
|
void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_shape, size_t *output_shape_size) {
|
||||||
|
@ -251,20 +262,25 @@ void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_sh
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TransferBuffer2Param(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, int *in_shape_) {
|
int TransferBuffer2Param(StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, const int *in_shape,
|
||||||
|
size_t in_shape_size) {
|
||||||
|
if (transfer_buffer->ndim_ >= in_shape_size || param->in_shape_length_ >= in_shape_size) {
|
||||||
|
return NNACL_ERR;
|
||||||
|
}
|
||||||
for (int i = 0; i < transfer_buffer->ndim_; i++) {
|
for (int i = 0; i < transfer_buffer->ndim_; i++) {
|
||||||
param->begins_[i] = transfer_buffer->begins_[i];
|
param->begins_[i] = transfer_buffer->begins_[i];
|
||||||
param->ends_[i] = transfer_buffer->ends_[i];
|
param->ends_[i] = transfer_buffer->ends_[i];
|
||||||
param->in_shape_[i] = in_shape_[i];
|
param->in_shape_[i] = in_shape[i];
|
||||||
param->strides_[i] = transfer_buffer->strides_[i];
|
param->strides_[i] = transfer_buffer->strides_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = transfer_buffer->ndim_; i < param->in_shape_length_; i++) {
|
for (int i = transfer_buffer->ndim_; i < param->in_shape_length_; i++) {
|
||||||
param->begins_[i] = 0;
|
param->begins_[i] = 0;
|
||||||
param->ends_[i] = in_shape_[i];
|
param->ends_[i] = in_shape[i];
|
||||||
param->in_shape_[i] = in_shape_[i];
|
param->in_shape_[i] = in_shape[i];
|
||||||
param->strides_[i] = 1;
|
param->strides_[i] = 1;
|
||||||
}
|
}
|
||||||
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer) {
|
void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer) {
|
||||||
|
@ -302,9 +318,9 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
|
||||||
return NNACL_INFER_INVALID;
|
return NNACL_INFER_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
int in_shape_[MAX_SHAPE_SIZE];
|
int in_shape[MAX_SHAPE_SIZE];
|
||||||
size_t in_shape_size = 0;
|
size_t in_shape_size = 0;
|
||||||
ShapeSet(in_shape_, &in_shape_size, input->shape_, input->shape_size_);
|
ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_);
|
||||||
|
|
||||||
StridedSliceTransferBuffer transfer_buffer;
|
StridedSliceTransferBuffer transfer_buffer;
|
||||||
InitStridedSliceTransferBuffer(&transfer_buffer);
|
InitStridedSliceTransferBuffer(&transfer_buffer);
|
||||||
|
@ -345,7 +361,7 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inputs_size == 5) {
|
if (inputs_size == 5) {
|
||||||
int ret = HandleAxesInputExist(inputs, &transfer_buffer.ndim_, in_shape_, transfer_buffer.begins_,
|
int ret = HandleAxesInputExist(inputs, &transfer_buffer.ndim_, in_shape, transfer_buffer.begins_,
|
||||||
transfer_buffer.strides_, transfer_buffer.ends_);
|
transfer_buffer.strides_, transfer_buffer.ends_);
|
||||||
if (ret != NNACL_OK) {
|
if (ret != NNACL_OK) {
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -355,15 +371,24 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
|
||||||
// set all mask to original input shape
|
// set all mask to original input shape
|
||||||
SetMaskSize(&transfer_buffer);
|
SetMaskSize(&transfer_buffer);
|
||||||
Bit2Vector(&transfer_buffer, param);
|
Bit2Vector(&transfer_buffer, param);
|
||||||
ApplyNewAxisMask(&transfer_buffer, param, in_shape_, &in_shape_size);
|
ApplyNewAxisMask(&transfer_buffer, param, in_shape, &in_shape_size);
|
||||||
ApplyBeginMask(&transfer_buffer);
|
ApplyBeginMask(&transfer_buffer);
|
||||||
ApplyEndMask(&transfer_buffer, in_shape_);
|
int ret = ApplyEndMask(&transfer_buffer, in_shape, MAX_SHAPE_SIZE);
|
||||||
ApplyEllipsisMask(&transfer_buffer, in_shape_);
|
if (ret != NNACL_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
ret = ApplyEllipsisMask(&transfer_buffer, in_shape, MAX_SHAPE_SIZE);
|
||||||
|
if (ret != NNACL_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int output_shape[MAX_SHAPE_SIZE];
|
int output_shape[MAX_SHAPE_SIZE];
|
||||||
size_t output_shape_size = 0;
|
size_t output_shape_size = 0;
|
||||||
ShapeSet(output_shape, &output_shape_size, in_shape_, in_shape_size);
|
ShapeSet(output_shape, &output_shape_size, in_shape, in_shape_size);
|
||||||
TransIndexToPositive(&transfer_buffer, in_shape_);
|
ret = TransIndexToPositive(&transfer_buffer, in_shape, MAX_SHAPE_SIZE);
|
||||||
|
if (ret != NNACL_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
for (int i = 0; i < transfer_buffer.ndim_; i++) {
|
for (int i = 0; i < transfer_buffer.ndim_; i++) {
|
||||||
if (transfer_buffer.strides_[i] == 0) {
|
if (transfer_buffer.strides_[i] == 0) {
|
||||||
return NNACL_ERR;
|
return NNACL_ERR;
|
||||||
|
@ -374,8 +399,10 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
|
||||||
}
|
}
|
||||||
ApplyShrinkMask(&transfer_buffer, output_shape, &output_shape_size);
|
ApplyShrinkMask(&transfer_buffer, output_shape, &output_shape_size);
|
||||||
SetShapeArray(outputs[0], output_shape, output_shape_size);
|
SetShapeArray(outputs[0], output_shape, output_shape_size);
|
||||||
TransferBuffer2Param(&transfer_buffer, param, in_shape_);
|
ret = TransferBuffer2Param(&transfer_buffer, param, in_shape, MAX_SHAPE_SIZE);
|
||||||
|
if (ret != NNACL_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
return NNACL_OK;
|
return NNACL_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ OpParameter *PopulateSoftmaxParameter(const void *prim) {
|
||||||
auto prim_softmax = primitive->value_as_Softmax();
|
auto prim_softmax = primitive->value_as_Softmax();
|
||||||
if (prim_softmax->axis()->size() != 1) {
|
if (prim_softmax->axis()->size() != 1) {
|
||||||
MS_LOG(ERROR) << "axis number invalid!number: " << prim_softmax->axis()->size();
|
MS_LOG(ERROR) << "axis number invalid!number: " << prim_softmax->axis()->size();
|
||||||
|
free(softmax_param);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
softmax_param->axis_ = prim_softmax->axis()->data()[0];
|
softmax_param->axis_ = prim_softmax->axis()->data()[0];
|
||||||
|
|
|
@ -34,6 +34,7 @@ OpParameter *PopulateSplitParameter(const void *prim) {
|
||||||
split_param->num_split_ = split_prim->numberSplit();
|
split_param->num_split_ = split_prim->numberSplit();
|
||||||
if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
|
if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
|
||||||
MS_LOG(ERROR) << "The value of split_param->num_split_ is too big";
|
MS_LOG(ERROR) << "The value of split_param->num_split_ is too big";
|
||||||
|
free(split_param);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int)));
|
int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int)));
|
||||||
|
|
|
@ -51,7 +51,7 @@ inline ConvParameter *CreateNewConvParameter(ConvParameter *parameter) {
|
||||||
|
|
||||||
inline void FreeMemory(ConvParameter *conv_param, const std::vector<lite::Tensor *> &new_inputs,
|
inline void FreeMemory(ConvParameter *conv_param, const std::vector<lite::Tensor *> &new_inputs,
|
||||||
const std::vector<lite::Tensor *> &new_outputs) {
|
const std::vector<lite::Tensor *> &new_outputs) {
|
||||||
if (conv_param) {
|
if (conv_param != nullptr) {
|
||||||
free(conv_param);
|
free(conv_param);
|
||||||
}
|
}
|
||||||
for (auto &in_tensor : new_inputs) {
|
for (auto &in_tensor : new_inputs) {
|
||||||
|
|
|
@ -146,7 +146,7 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel) {
|
if (kernel != nullptr) {
|
||||||
auto ret = kernel->Init();
|
auto ret = kernel->Init();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "conv kernel init failed.";
|
MS_LOG(ERROR) << "conv kernel init failed.";
|
||||||
|
|
|
@ -112,12 +112,6 @@ kernel::LiteKernel *NpuConvKernelCreator(const std::vector<lite::Tensor *> &inpu
|
||||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||||
MS_ASSERT(op_parameter != nullptr);
|
MS_ASSERT(op_parameter != nullptr);
|
||||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
||||||
if (inputs[0]->Size() > NPU_MEMORY_MAX) {
|
|
||||||
MS_LOG(ERROR) << "Npu does not support input tensor size greater than 200MB";
|
|
||||||
free(op_parameter);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
||||||
kernel::NPUKernel *kernel = nullptr;
|
kernel::NPUKernel *kernel = nullptr;
|
||||||
if (conv_param->group_ == 1) {
|
if (conv_param->group_ == 1) {
|
||||||
|
|
|
@ -27,7 +27,6 @@ using mindspore::kernel::LiteKernel;
|
||||||
using mindspore::lite::RET_ERROR;
|
using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
#define NPU_MEMORY_MAX 200 * 1024 * 1024
|
|
||||||
class NPUKernel : public LiteKernel {
|
class NPUKernel : public LiteKernel {
|
||||||
public:
|
public:
|
||||||
NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||||
|
@ -63,11 +62,6 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||||
free(op_parameter);
|
free(op_parameter);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (inputs[0]->Size() > NPU_MEMORY_MAX) {
|
|
||||||
MS_LOG(ERROR) << "Npu does not support input tensor size greater than 200MB";
|
|
||||||
free(op_parameter);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx);
|
auto *kernel = new (std::nothrow) T(op_parameter, inputs, outputs, ctx);
|
||||||
if (kernel == nullptr) {
|
if (kernel == nullptr) {
|
||||||
MS_LOG(ERROR) << "kernel " << op_parameter->name_ << "is nullptr.";
|
MS_LOG(ERROR) << "kernel " << op_parameter->name_ << "is nullptr.";
|
||||||
|
|
Loading…
Reference in New Issue