!9127 [MSLITE][Develop] fix code review
From: @sunsuodong Reviewed-by: @zhang_xue_tong,@zhanghaibo5 Signed-off-by: @zhang_xue_tong
This commit is contained in:
commit
a1a2a957cd
|
@ -33,7 +33,7 @@ void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinMaxTopk1(const void *input, void *output, const int *shape, const ArgMinMaxParameter *param) {
|
||||
int pre_axis_count = 1;
|
||||
int axis_count = 1;
|
||||
int after_axis_count = 1;
|
||||
|
@ -48,7 +48,7 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->get_max_) {
|
||||
switch (param->axis_) {
|
||||
case 0:
|
||||
|
@ -82,7 +82,7 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->topk_ == 1) {
|
||||
ArgMinMaxTopk1(input, output, in_shape, param);
|
||||
return;
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinMax(const void *input, void *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "nnacl/arithmetic_common.h"
|
||||
#include "nnacl/nnacl_utils.h"
|
||||
|
||||
void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides,
|
||||
int *outStrides, int *multiple) {
|
||||
void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple) {
|
||||
int srcDimSize = inShape[dim];
|
||||
if (dim == ndim - 1) {
|
||||
for (int i = 0; i < multiple[dim]; i++) {
|
||||
|
@ -35,8 +35,8 @@ void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *
|
|||
}
|
||||
}
|
||||
|
||||
void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides,
|
||||
int *outStrides, int *multiple) {
|
||||
void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple) {
|
||||
int srcDimSize = inShape[dim];
|
||||
if (dim == ndim - 1) {
|
||||
for (int i = 0; i < multiple[dim]; i++) {
|
||||
|
@ -74,7 +74,8 @@ void CalcMultiplesAndStrides(ArithmeticParameter *param) {
|
|||
ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_);
|
||||
}
|
||||
|
||||
void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param) {
|
||||
void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
|
||||
ArithmeticParameter *param) {
|
||||
CalcMultiplesAndStrides(param);
|
||||
TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
|
||||
param->multiples0_);
|
||||
|
@ -82,7 +83,7 @@ void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_d
|
|||
param->multiples1_);
|
||||
}
|
||||
|
||||
void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
|
||||
void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
|
||||
ArithmeticParameter *param) {
|
||||
CalcMultiplesAndStrides(param);
|
||||
TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
|
||||
|
@ -91,7 +92,7 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui
|
|||
param->multiples1_);
|
||||
}
|
||||
|
||||
void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
|
||||
void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
|
||||
ArithmeticParameter *param) {
|
||||
CalcMultiplesAndStrides(param);
|
||||
TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_,
|
||||
|
|
|
@ -47,18 +47,19 @@ typedef struct ArithmeticParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides,
|
||||
int *outStrides, int *multiple);
|
||||
void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple);
|
||||
void ComputeStrides(const int *shape, int *strides, const int ndim);
|
||||
|
||||
void CalcMultiplesAndStrides(ArithmeticParameter *param);
|
||||
|
||||
void TileOneDimensionUint8(uint8_t *inData, uint8_t *outData, int dim, size_t ndim, int *inShape, int *inStrides,
|
||||
int *outStrides, int *multiple);
|
||||
void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param);
|
||||
void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
|
||||
void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple);
|
||||
void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
|
||||
ArithmeticParameter *param);
|
||||
void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1,
|
||||
ArithmeticParameter *param);
|
||||
void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
|
||||
void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
|
||||
ArithmeticParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -16,6 +16,6 @@
|
|||
#include "nnacl/flatten.h"
|
||||
#include <string.h>
|
||||
|
||||
void Flatten(const void *input, void *output, FlattenParameter *flatten_param) {
|
||||
void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) {
|
||||
memcpy(output, input, flatten_param->size);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ typedef struct FlattenParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Flatten(const void *input, void *output, FlattenParameter *flatten_param);
|
||||
void Flatten(const void *input, void *output, const FlattenParameter *flatten_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -43,7 +43,7 @@ int ArgCompareDescFp32(const void *a, const void *b) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||
for (int j = 0; j < in_shape[0]; ++j) {
|
||||
size_t offset = param->in_strides_[0] * j + i;
|
||||
|
@ -58,7 +58,7 @@ void ArgMaxDim0OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||
for (int j = 0; j < in_shape[0]; ++j) {
|
||||
size_t offset = param->in_strides_[0] * j + i;
|
||||
|
@ -73,7 +73,7 @@ void ArgMaxDim0OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||
for (int j = 0; j < in_shape[0]; ++j) {
|
||||
size_t offset = param->in_strides_[0] * j + i;
|
||||
|
@ -88,7 +88,7 @@ void ArgMinDim0OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
|
||||
for (int j = 0; j < in_shape[0]; ++j) {
|
||||
size_t offset = param->in_strides_[0] * j + i;
|
||||
|
@ -103,7 +103,7 @@ void ArgMinDim0OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
|
@ -123,7 +123,7 @@ void ArgMaxDim1OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
|
@ -143,7 +143,7 @@ void ArgMaxDim1OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
|
@ -163,7 +163,7 @@ void ArgMinDim1OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
size_t in_dim0_offset = i * param->in_strides_[0];
|
||||
|
@ -183,7 +183,7 @@ void ArgMinDim1OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
|
@ -208,7 +208,7 @@ void ArgMaxDim2OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
|
@ -233,7 +233,7 @@ void ArgMaxDim2OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
|
@ -258,7 +258,7 @@ void ArgMinDim2OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
for (int i = 0; i < in_shape[0]; ++i) {
|
||||
|
@ -283,7 +283,7 @@ void ArgMinDim2OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
int in_shape3 = in_shape[3];
|
||||
|
@ -311,7 +311,7 @@ void ArgMaxDim3OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
int in_shape3 = in_shape[3];
|
||||
|
@ -339,7 +339,7 @@ void ArgMaxDim3OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
int in_shape3 = in_shape[3];
|
||||
|
@ -367,7 +367,7 @@ void ArgMinDim3OutValue(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
int in_shape1 = in_shape[1];
|
||||
int in_shape2 = in_shape[2];
|
||||
int in_shape3 = in_shape[3];
|
||||
|
@ -395,7 +395,7 @@ void ArgMinDim3OutIndex(const float *input, float *output, const int *in_shape,
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMaxDim0OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -403,7 +403,7 @@ void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMinDim0OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -411,7 +411,7 @@ void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMaxDim1OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -419,7 +419,7 @@ void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMinDim1OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -427,7 +427,7 @@ void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMaxDim2OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -435,7 +435,7 @@ void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMinDim2OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -443,7 +443,7 @@ void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMaxDim3OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -451,7 +451,7 @@ void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param) {
|
||||
void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param) {
|
||||
if (param->out_value_) {
|
||||
ArgMinDim3OutValue(input, output, in_shape, param);
|
||||
} else {
|
||||
|
@ -459,7 +459,7 @@ void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMa
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count) {
|
||||
bool out_value = param->out_value_;
|
||||
for (int i = 0; i < pre_axis_count; ++i) {
|
||||
|
@ -480,7 +480,7 @@ void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pr
|
|||
}
|
||||
}
|
||||
|
||||
void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count) {
|
||||
bool out_value = param->out_value_;
|
||||
for (int i = 0; i < pre_axis_count; ++i) {
|
||||
|
|
|
@ -21,18 +21,18 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
void ArgMax(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count);
|
||||
void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
void ArgMin(const float *input, float *output, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
|
||||
int after_axis_count);
|
||||
void ArgMaxDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim0(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim1(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
|
||||
void ArgMaxDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMinDim0(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMaxDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMinDim1(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMaxDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMinDim2(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMaxDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
void ArgMinDim3(const float *input, float *output, const int *in_shape, const ArgMinMaxParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -615,8 +615,8 @@ int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param) {
|
||||
int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementMul(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -690,21 +690,21 @@ int ElementAddInt(const int *input0, const int *input1, int *output, const int e
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementAddInt8(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
|
||||
int ElementAddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int element_size) {
|
||||
for (int i = 0; i < element_size; i++) {
|
||||
output[i] = input0[i] + input1[i];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param) {
|
||||
int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementAdd(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
||||
int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1,
|
||||
int8_t *output, int element_size, ArithmeticParameter *param) {
|
||||
TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementAddInt8(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -763,8 +763,8 @@ int ElementSubRelu6(const float *input0, const float *input1, float *output, con
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param) {
|
||||
int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementSub(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -791,8 +791,8 @@ int ElementDivRelu6(const float *input0, const float *input1, float *output, con
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param) {
|
||||
int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementDiv(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -811,7 +811,7 @@ int ElementFloorModInt(const int *input0, const int *input1, int *output, const
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementFloorMod(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -831,7 +831,7 @@ int ElementFloorDivInt(const int *input0, const int *input1, int *output, const
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementFloorDiv(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -862,13 +862,13 @@ int ElementSquaredDifference(const float *input0, const float *input1, float *ou
|
|||
return ElementMul(output, output, output, element_size);
|
||||
}
|
||||
|
||||
int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
|
||||
float *output, int element_size, ArithmeticParameter *param) {
|
||||
BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param);
|
||||
return ElementMul(output, output, output, element_size);
|
||||
}
|
||||
|
||||
int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementLogicalAnd(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -894,7 +894,7 @@ int ElementLogicalOr(const float *input0, const float *input1, float *output, co
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementLogicalOr(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -916,7 +916,7 @@ int ElementMaximum(const float *input0, const float *input1, float *output, cons
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementMaximum(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -938,7 +938,7 @@ int ElementMinimum(const float *input0, const float *input1, float *output, cons
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementMinimum(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -970,7 +970,7 @@ int ElementNotEqual(const float *input0, const float *input1, float *output, con
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementNotEqual(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -1002,7 +1002,7 @@ int ElementEqual(const float *input0, const float *input1, float *output, const
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementEqual(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -1026,8 +1026,8 @@ int ElementLess(const float *input0, const float *input1, float *output, const i
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param) {
|
||||
int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementLess(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -1050,7 +1050,7 @@ int ElementLessEqual(const float *input0, const float *input1, float *output, co
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementLessEqual(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -1074,7 +1074,7 @@ int ElementGreater(const float *input0, const float *input1, float *output, cons
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementGreater(tile_input0, tile_input1, output, element_size);
|
||||
|
@ -1098,8 +1098,8 @@ int ElementGreaterEqual(const float *input0, const float *input1, float *output,
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param) {
|
||||
int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
|
||||
float *output, int element_size, ArithmeticParameter *param) {
|
||||
TileDimensions(input0, input1, tile_input0, tile_input1, param);
|
||||
return ElementGreaterEqual(tile_input0, tile_input1, output, element_size);
|
||||
}
|
||||
|
@ -1111,7 +1111,7 @@ int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *
|
|||
int *in_datatype, int *out_datatype, OpParameter *param) {
|
||||
*out_format = in_format[0];
|
||||
*out_datatype = in_datatype[0];
|
||||
ArithmeticParameter *arithmetic_parameter = (ArithmeticParameter *)param;
|
||||
const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param;
|
||||
int ndim0 = dim_size[0];
|
||||
int ndim1 = dim_size[1];
|
||||
int *in_shape0 = in_shape[0];
|
||||
|
|
|
@ -64,85 +64,85 @@ int ElementMulRelu6(const float *input0, const float *input1, float *output, con
|
|||
int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int BroadcastMul(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param);
|
||||
int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementAdd(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int BroadcastAdd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param);
|
||||
int BroadcastAddInt8(int8_t *input0, int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, int8_t *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1,
|
||||
int8_t *output, int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementSub(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param);
|
||||
int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementDiv(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param);
|
||||
int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastLogicalAnd(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastLogicalOr(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastMaximum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size);
|
||||
int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastSquaredDifference(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
|
||||
float *output, int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size);
|
||||
|
||||
int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementEqual(const float *input0, const float *input1, float *output, const int element_size);
|
||||
|
||||
int BroadcastEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementLess(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastLess(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output, int element_size,
|
||||
ArithmeticParameter *param);
|
||||
int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastLessEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementGreater(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
|
||||
int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size);
|
||||
int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
|
||||
int element_size, ArithmeticParameter *param);
|
||||
int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1,
|
||||
float *output, int element_size, ArithmeticParameter *param);
|
||||
|
||||
#ifdef ENABLE_NNACL_INFER_SHAPE
|
||||
int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
#include "nnacl/batchnorm_parameter.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
|
||||
void *output) {
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param,
|
||||
int task_id, void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
|
@ -37,7 +37,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba
|
|||
}
|
||||
|
||||
void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, BatchNormParameter *param, int task_id, void *output) {
|
||||
const void *variance, const BatchNormParameter *param, int task_id, void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
|
@ -53,7 +53,7 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset
|
|||
}
|
||||
}
|
||||
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param,
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param,
|
||||
float *save_mean, float *save_var) {
|
||||
const float N = (float)param->unit_;
|
||||
const float VN = N;
|
||||
|
|
|
@ -23,12 +23,12 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
|
||||
void *output);
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance, const BatchNormParameter *param,
|
||||
int task_id, void *output);
|
||||
void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, BatchNormParameter *param, int task_id, void *output);
|
||||
const void *variance, const BatchNormParameter *param, int task_id, void *output);
|
||||
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param,
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, const BatchNormParameter *param,
|
||||
float *save_mean, float *save_var);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "nnacl/fp32/concat_fp32.h"
|
||||
#include <string.h>
|
||||
|
||||
void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
|
||||
int task_id, int thread_num) {
|
||||
void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
|
||||
void *output, int task_id, int thread_num) {
|
||||
int before_axis_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
before_axis_size *= inputs_output_shape[0][i];
|
||||
|
@ -32,12 +32,12 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
|
|||
uint8_t *dst_base = (output);
|
||||
size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
|
||||
for (int i = 0; i < input_num; ++i) {
|
||||
uint8_t *src_base = (input[i]);
|
||||
const uint8_t *src_base = (input[i]);
|
||||
size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
|
||||
int offset = UP_DIV(input_stride, thread_num);
|
||||
int count = MSMIN(offset, input_stride - offset * task_id);
|
||||
for (int j = 0; j < before_axis_size; j++) {
|
||||
uint8_t *src = src_base + j * input_stride + task_id * offset;
|
||||
const uint8_t *src = src_base + j * input_stride + task_id * offset;
|
||||
uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size + task_id * offset;
|
||||
memcpy(dst, src, count);
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
|
||||
int task_id, int thread_num);
|
||||
void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
|
||||
void *output, int task_id, int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "nnacl/fp32/constant_of_shape_fp32.h"
|
||||
|
||||
int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) {
|
||||
int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param) {
|
||||
int size = param->unit_;
|
||||
float data = param->value_;
|
||||
int ind_st = MSMIN(tid * size, param->element_sz_);
|
||||
|
@ -27,7 +27,7 @@ int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param) {
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param) {
|
||||
int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param) {
|
||||
int size = param->unit_;
|
||||
float data = param->value_;
|
||||
int ind_st = MSMIN(tid * size, param->element_sz_);
|
||||
|
|
|
@ -33,8 +33,8 @@ typedef struct ConstantOfShapeParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int ConstantOfShape(float *output, int tid, ConstantOfShapeParameter *param);
|
||||
int ConstantOfShapeInt(int32_t *output, int tid, ConstantOfShapeParameter *param);
|
||||
int ConstantOfShape(float *output, int tid, const ConstantOfShapeParameter *param);
|
||||
int ConstantOfShapeInt(int32_t *output, int tid, const ConstantOfShapeParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
// fp32 conv common
|
||||
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
|
||||
float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param) {
|
||||
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param) {
|
||||
int out_channel = conv_param->output_channel_;
|
||||
int deep = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
|
||||
int output_count = conv_param->output_h_ * conv_param->output_w_;
|
||||
|
@ -61,8 +61,8 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_
|
|||
|
||||
// fp32 conv winograd
|
||||
void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
|
||||
TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func,
|
||||
OutputTransFunc out_func) {
|
||||
TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,
|
||||
InputTransFunc in_func, OutputTransFunc out_func) {
|
||||
int in_channel = conv_param->input_channel_;
|
||||
int out_w_block = UP_DIV(conv_param->output_w_, conv_param->output_unit_);
|
||||
int out_h_block = UP_DIV(conv_param->output_h_, conv_param->output_unit_);
|
||||
|
|
|
@ -35,12 +35,12 @@ extern "C" {
|
|||
|
||||
// fp32 convolution common (im2col+gemm)
|
||||
void ConvFp32(const float *input_data, float *packed_input, const float *packed_weight, const float *bias_data,
|
||||
float *col_major_input, float *output_data, int task_id, ConvParameter *conv_param);
|
||||
float *col_major_input, float *output_data, int task_id, const ConvParameter *conv_param);
|
||||
|
||||
// fp32 convolution winograd
|
||||
void ConvWinogardFp32(const float *input_data, const float *trans_weight, const float *bias_data, float *output_data,
|
||||
TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, InputTransFunc in_func,
|
||||
OutputTransFunc out_func);
|
||||
TmpBufferAddress *buffer_list, int task_id, const ConvParameter *conv_param,
|
||||
InputTransFunc in_func, OutputTransFunc out_func);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
void Pad4DOffset(CropParameter *crop_param, int64_t *offset) {
|
||||
void Pad4DOffset(const CropParameter *crop_param, int64_t *offset) {
|
||||
int axis = crop_param->axis_;
|
||||
for (int i = DIMENSION_4D - 1; i >= 0; --i) {
|
||||
int offset_index = i - axis;
|
||||
|
@ -30,8 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) {
|
|||
}
|
||||
}
|
||||
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
|
||||
int thread_id) {
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape,
|
||||
const CropParameter *crop_param, int thread_id) {
|
||||
int64_t offset_pad[DIMENSION_4D];
|
||||
Pad4DOffset(crop_param, offset_pad);
|
||||
int out_shape1 = out_shape[1];
|
||||
|
@ -66,7 +66,7 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o
|
|||
}
|
||||
|
||||
void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
|
||||
CropParameter *crop_param) {
|
||||
const CropParameter *crop_param) {
|
||||
int64_t offset_pad[DIMENSION_4D];
|
||||
Pad4DOffset(crop_param, offset_pad);
|
||||
size_t in_dim2_stride = in_shape[3];
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param,
|
||||
int thread_id);
|
||||
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape,
|
||||
const CropParameter *crop_param, int thread_id);
|
||||
void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
|
||||
CropParameter *crop_param);
|
||||
const CropParameter *crop_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -34,7 +34,7 @@ void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, in
|
|||
}
|
||||
|
||||
void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *dst, int output_channel,
|
||||
ConvParameter *conv_param) {
|
||||
const ConvParameter *conv_param) {
|
||||
/* arm64 row12x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */
|
||||
/* arm32 row4x8-major(ih*iw x oc*kh*kw) -> row8-major(oh*ow x oc) */
|
||||
size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
|
||||
|
|
|
@ -30,7 +30,7 @@ extern "C" {
|
|||
#endif
|
||||
void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane);
|
||||
void DeConvPostFp32C8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel,
|
||||
ConvParameter *conv_param);
|
||||
const ConvParameter *conv_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
#include "nnacl/fp32/deconv_winograd_fp32.h"
|
||||
|
||||
int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
|
||||
DeConvParam *deconv_param) {
|
||||
int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param) {
|
||||
int tmp_kernel_plane = unit->w_size_ * unit->h_size_;
|
||||
int size = conv_param->input_channel_ * conv_param->output_channel_ * tmp_kernel_plane;
|
||||
float *current_unit_weight = (float *)malloc(size * sizeof(float));
|
||||
|
@ -25,13 +25,14 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame
|
|||
return NNACL_NULL_PTR;
|
||||
}
|
||||
for (int ic = 0; ic < conv_param->input_channel_; ic++) {
|
||||
float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic;
|
||||
const float *src_ic = nhwc_weight + deconv_param->kernel_plane_ * conv_param->output_channel_ * ic;
|
||||
float *dst_ic = current_unit_weight + tmp_kernel_plane * conv_param->output_channel_ * ic;
|
||||
for (int uhi = 0; uhi < unit->h_size_; uhi++) {
|
||||
for (int uwi = 0; uwi < unit->w_size_; uwi++) {
|
||||
int src_h_offset = unit->h_start_ + uhi * conv_param->stride_h_;
|
||||
int src_w_offset = unit->w_start_ + uwi * conv_param->stride_w_;
|
||||
float *src_hw = src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_;
|
||||
const float *src_hw =
|
||||
src_ic + (src_h_offset * conv_param->kernel_w_ + src_w_offset) * conv_param->output_channel_;
|
||||
float *dst_hw = dst_ic + (uhi * unit->w_size_ + uwi) * conv_param->output_channel_;
|
||||
memcpy(dst_hw, src_hw, conv_param->output_channel_ * sizeof(float));
|
||||
}
|
||||
|
@ -132,10 +133,10 @@ int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParame
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
void DeConvWgInputPack(float *src_ptr, float *dst_ptr, int channel, int stride) {
|
||||
void DeConvWgInputPack(const float *src_ptr, float *dst_ptr, int channel, int stride) {
|
||||
int ic4div = channel / C4NUM;
|
||||
int ic4mod = channel % C4NUM;
|
||||
float *src = src_ptr;
|
||||
const float *src = src_ptr;
|
||||
float *dst = dst_ptr;
|
||||
|
||||
for (int ic = 0; ic < ic4div; ic++) {
|
||||
|
@ -340,9 +341,10 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s
|
|||
return;
|
||||
}
|
||||
|
||||
void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf, float *tmp_buf, const float *at_buf,
|
||||
float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf, float *b_tmp_buf,
|
||||
int unit_size, int w_start, int h_start, ConvParameter *conv_param, DeConvParam *deconv_param) {
|
||||
void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, const float *weight_buf, float *tmp_buf,
|
||||
const float *at_buf, float *a_mid_buf, float *trans_a_buf, bool *transfered, const float *bt_buf,
|
||||
float *b_tmp_buf, int unit_size, int w_start, int h_start, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param) {
|
||||
int winograd_plane = unit_size * unit_size;
|
||||
if (!transfered[unit_size]) {
|
||||
WinogradTransLeft(tile_in, at_buf, a_mid_buf, DECONV_WINOGRAD_DEFAULT_UNIT, unit_size, DECONV_WINOGRAD_DEFAULT_UNIT,
|
||||
|
@ -355,7 +357,7 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf,
|
|||
for (int index = 0; index < winograd_plane; index++) {
|
||||
float *src = trans_a_buf + index * DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
|
||||
float *dst = tmp_buf + index * deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
|
||||
float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
|
||||
const float *weight = weight_buf + index * deconv_param->ic_up4_ * deconv_param->oc_up4_;
|
||||
TiledC4MatmulFp32(dst, src, weight, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM, deconv_param->ic_div4_,
|
||||
deconv_param->oc_div4_);
|
||||
}
|
||||
|
@ -380,15 +382,16 @@ void DeConvWgCalWgFp32(const float *tile_in, float *tile_out, float *weight_buf,
|
|||
return;
|
||||
}
|
||||
|
||||
void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start, int w_start,
|
||||
int h_size, int w_size, ConvParameter *conv_param, DeConvParam *deconv_param) {
|
||||
void DeConvWgCalCommFp32(const float *tile_in, float *tile_out, const float *weight, float *tmp_buf, int h_start,
|
||||
int w_start, int h_size, int w_size, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param) {
|
||||
int count = deconv_param->oc_div4_ * w_size * h_size;
|
||||
int in_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->ic_up4_;
|
||||
int out_stride = DECONV_WINOGRAD_DEFAULT_TILE * deconv_param->oc_up4_;
|
||||
|
||||
for (int hi = 0; hi < DECONV_WINOGRAD_DEFAULT_UNIT; hi++) {
|
||||
for (int wi = 0; wi < DECONV_WINOGRAD_DEFAULT_UNIT; wi++) {
|
||||
float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
|
||||
const float *src_in = tile_in + (wi + hi * DECONV_WINOGRAD_DEFAULT_UNIT) * in_stride;
|
||||
TiledC4MatmulFp32(tmp_buf, src_in, weight, DECONV_WINOGRAD_DEFAULT_TILE * 4, deconv_param->ic_div4_, count);
|
||||
|
||||
for (int uhi = 0; uhi < h_size; uhi++) {
|
||||
|
@ -406,8 +409,8 @@ void DeConvWgCalCommFp32(float *tile_in, float *tile_out, const float *weight, f
|
|||
return;
|
||||
}
|
||||
|
||||
void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
|
||||
ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
|
||||
void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
|
||||
const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) {
|
||||
/* pack tile input */
|
||||
int tile_in_unit_stride = deconv_param->ic_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
|
||||
#ifdef ENABLE_ARM
|
||||
|
@ -439,7 +442,7 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind
|
|||
continue;
|
||||
}
|
||||
|
||||
float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
|
||||
const float *src = nhwc_input_ + (w_index + h_index * conv_param->input_w_) * conv_param->input_channel_;
|
||||
DeConvWgInputPack(src, dst, conv_param->input_channel_, DECONV_WINOGRAD_DEFAULT_TILE * C4NUM);
|
||||
}
|
||||
}
|
||||
|
@ -474,8 +477,8 @@ void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_ind
|
|||
return;
|
||||
}
|
||||
|
||||
void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param,
|
||||
int calculate_count, int tile_index) {
|
||||
void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param, int calculate_count, int tile_index) {
|
||||
/* merge */
|
||||
int src_unit_stride = deconv_param->oc_up4_ * DECONV_WINOGRAD_DEFAULT_TILE;
|
||||
|
||||
|
@ -483,7 +486,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par
|
|||
int dst_stride = conv_param->output_w_ * conv_param->output_h_ * C4NUM;
|
||||
|
||||
for (int index = 0; index < calculate_count; ++index) {
|
||||
float *src_start = tile_out + index * C4NUM;
|
||||
const float *src_start = tile_out + index * C4NUM;
|
||||
|
||||
int plane_index = tile_index * DECONV_WINOGRAD_DEFAULT_TILE + index;
|
||||
int w_unit_index = plane_index % deconv_param->in_tile_w_count_;
|
||||
|
@ -499,7 +502,7 @@ void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_par
|
|||
|
||||
for (int hi = merge_h_start; hi < merge_h_end; hi++) {
|
||||
for (int wi = merge_w_start; wi < merge_w_end; wi++) {
|
||||
float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
|
||||
const float *src = src_start + (hi * deconv_param->out_tile_w_ + wi) * src_unit_stride;
|
||||
float *dst = dst_start + (hi * conv_param->output_w_ + wi) * C4NUM;
|
||||
DeConvWgMerge(src, dst, src_stride, dst_stride, deconv_param->oc_div4_);
|
||||
}
|
||||
|
|
|
@ -28,12 +28,12 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int PackDeConvWgDataFp32(float *nhwc_weight, DeConvComputeUnit *unit, ConvParameter *conv_param,
|
||||
DeConvParam *deconv_param);
|
||||
void DeconvWg(float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
|
||||
ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
|
||||
void DeconvWgPost(float *tile_out, float *nc4hw4_output, ConvParameter *conv_param, DeConvParam *deconv_param,
|
||||
int calculate_count, int tile_index);
|
||||
int PackDeConvWgDataFp32(const float *nhwc_weight, DeConvComputeUnit *unit, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param);
|
||||
void DeconvWg(const float *nhwc_input_, float *tile_in, float *tile_out, int start_index, int calculate_count,
|
||||
const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id);
|
||||
void DeconvWgPost(const float *tile_out, float *nc4hw4_output, const ConvParameter *conv_param,
|
||||
const DeConvParam *deconv_param, int calculate_count, int tile_index);
|
||||
void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t ic4, size_t cal_num, size_t oc4);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -36,8 +36,8 @@ float IntersectionOverUnion(const BboxCorner *a, const BboxCorner *b) {
|
|||
return inter / (area_a + area_b - inter);
|
||||
}
|
||||
|
||||
int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors,
|
||||
DetectionPostProcessParameter *param) {
|
||||
int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors,
|
||||
const DetectionPostProcessParameter *param) {
|
||||
if (input_boxes == NULL || anchors == NULL || param == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
|
|
@ -37,8 +37,8 @@ typedef struct {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int DecodeBoxes(const int num_boxes, const float *input_boxes, const float *anchors,
|
||||
DetectionPostProcessParameter *param);
|
||||
int DecodeBoxes(int num_boxes, const float *input_boxes, const float *anchors,
|
||||
const DetectionPostProcessParameter *param);
|
||||
|
||||
int NmsMultiClassesFastCore(const int num_boxes, const int num_classes_with_bg, const float *input_scores,
|
||||
void (*)(const float *, int *, int, int), const DetectionPostProcessParameter *param,
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
#include <math.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
void Calculate_Data(const float *input_data, float *output_data, int num, EluParameter *parameter) {
|
||||
void Calculate_Data(const float *input_data, float *output_data, int num, const EluParameter *parameter) {
|
||||
output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num];
|
||||
}
|
||||
|
||||
int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id) {
|
||||
int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id) {
|
||||
for (size_t i = task_id; i < parameter->in_size_; i += parameter->op_parameter_.thread_num_) {
|
||||
Calculate_Data(input_data, output_data, i, parameter);
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ typedef struct EluParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int Elu(const float *input_data, float *output_data, EluParameter *parameter, int task_id);
|
||||
int Elu(const float *input_data, float *output_data, const EluParameter *parameter, int task_id);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -31,7 +31,8 @@ void l2_regulate(float *data, int size, float max_norm) {
|
|||
return;
|
||||
}
|
||||
|
||||
int CopyData(float *input_data, int *ids, float *output_data, int num, EmbeddingLookupParameter *parameter) {
|
||||
int CopyData(float *input_data, const int *ids, float *output_data, int num,
|
||||
const EmbeddingLookupParameter *parameter) {
|
||||
if (ids[num] >= parameter->layer_num_ || ids[num] < 0) {
|
||||
return NNACL_ERRCODE_INDEX_OUT_OF_RANGE;
|
||||
}
|
||||
|
@ -46,7 +47,8 @@ int CopyData(float *input_data, int *ids, float *output_data, int num, Embedding
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id) {
|
||||
int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter,
|
||||
int task_id) {
|
||||
for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
|
||||
int ret = CopyData(input_data, ids, output_data, i, parameter);
|
||||
if (ret != NNACL_OK) {
|
||||
|
|
|
@ -31,7 +31,8 @@ typedef struct EmbeddingLookupParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id);
|
||||
int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const EmbeddingLookupParameter *parameter,
|
||||
int task_id);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id) {
|
||||
int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id) {
|
||||
if (parameter->scale_ == 1) {
|
||||
for (size_t i = task_id; i < parameter->element_num_; i += parameter->thread_num_) {
|
||||
output_data[i] = expf(input_data[i]);
|
||||
|
|
|
@ -33,7 +33,7 @@ typedef struct ExpParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int Exp(const float *input_data, float *output_data, ExpParameter *parameter, int task_id);
|
||||
int Exp(const float *input_data, float *output_data, const ExpParameter *parameter, int task_id);
|
||||
void ExpFp32(const float *src, float *dst, int num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -26,10 +26,10 @@ inline int Stride(const int *shape, int rank, int index) {
|
|||
return stride;
|
||||
}
|
||||
|
||||
int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
|
||||
int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
|
||||
float *output) {
|
||||
for (int m = 0; m < outer_size; ++m) {
|
||||
float *inputm = input + inner_size * m * limit;
|
||||
const float *inputm = input + inner_size * m * limit;
|
||||
float *outputm = output + inner_size * m * indices_element_size;
|
||||
for (int i = 0; i < indices_element_size; ++i) {
|
||||
if (indices[i] < 0 || indices[i] > limit) {
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int Gather(float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
|
||||
int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
|
||||
float *output);
|
||||
int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices,
|
||||
int indices_element_size, int32_t *output);
|
||||
|
|
|
@ -18,9 +18,8 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
|
||||
const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
|
||||
const int thread_num) {
|
||||
int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
|
||||
const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
|
|
@ -23,9 +23,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
|
||||
const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
|
||||
const int thread_num);
|
||||
int InstanceNorm(int outer_size, int inner_size, const float *src_data, const float *scale_data, const float *bias_data,
|
||||
const InstanceNormParameter *param, float *dst_data, int task_id, int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,9 +18,8 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data,
|
||||
const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid,
|
||||
const int thread_num) {
|
||||
int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data,
|
||||
bool affine, float epsilon, float *dst_data, int tid, int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
|
|
@ -23,9 +23,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int LayerNorm(const int outer_size, const int inner_size, const float *src_data, const float *gamma_data,
|
||||
const float *beta_data, const bool affine, const float epsilon, float *dst_data, const int tid,
|
||||
const int thread_num);
|
||||
int LayerNorm(int outer_size, int inner_size, const float *src_data, const float *gamma_data, const float *beta_data,
|
||||
bool affine, float epsilon, float *dst_data, int tid, int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
#include "nnacl/fp32/local_response_norm_fp32.h"
|
||||
#include <math.h>
|
||||
|
||||
int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr,
|
||||
LocalResponseNormParameter *param) {
|
||||
int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr,
|
||||
const LocalResponseNormParameter *param) {
|
||||
int depth_radius = param->depth_radius_;
|
||||
float bias = param->bias_;
|
||||
float alpha = param->alpha_;
|
||||
float beta = param->beta_;
|
||||
|
||||
for (int i = 0; i < out_size; i++) {
|
||||
float *in_data = input_ptr + i * channel;
|
||||
const float *in_data = input_ptr + i * channel;
|
||||
float *out_data = output_ptr + i * channel;
|
||||
|
||||
for (int j = 0; j < channel; j++) {
|
||||
|
|
|
@ -30,8 +30,8 @@ typedef struct LocalResponseNormParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr,
|
||||
LocalResponseNormParameter *param);
|
||||
int LocalResponseNorm(const float *input_ptr, int out_size, int channel, float *output_ptr,
|
||||
const LocalResponseNormParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "nnacl/fp32/activation_fp32.h"
|
||||
#include "nnacl/fp32/arithmetic_fp32.h"
|
||||
|
||||
void InitGate(float *gate_buffer, const float *bias, LstmParameter *lstm_parm) {
|
||||
void InitGate(float *gate_buffer, const float *bias, const LstmParameter *lstm_parm) {
|
||||
int gate_offest = 0;
|
||||
for (int l = 0; l < 4; l++) {
|
||||
int batch_offest = gate_offest;
|
||||
|
@ -94,7 +94,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we
|
|||
const float *input_cell_weight, const float *input_output_weight, const float *state_input_weight,
|
||||
const float *state_forget_weight, const float *state_cell_weight, const float *state_output_weight,
|
||||
const float *bias, float *hidden_state, float *cell_state, float *gate_buffer,
|
||||
LstmParameter *lstm_parm) {
|
||||
const LstmParameter *lstm_parm) {
|
||||
InitGate(gate_buffer, bias, lstm_parm);
|
||||
|
||||
float *input_gate = gate_buffer;
|
||||
|
@ -139,7 +139,7 @@ void LstmStepUnit(float *output, const float *input, const float *input_input_we
|
|||
}
|
||||
|
||||
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias,
|
||||
float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm) {
|
||||
float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm) {
|
||||
// forward
|
||||
const float *input_input_weight = weight_i;
|
||||
const float *input_forget_weight = weight_i + lstm_parm->input_size_ * lstm_parm->hidden_size_ * 2;
|
||||
|
|
|
@ -34,7 +34,7 @@ typedef struct LstmParameter {
|
|||
extern "C" {
|
||||
#endif
|
||||
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias,
|
||||
float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm);
|
||||
float *hidden_state, float *cell_state, float *gate_buffer, const LstmParameter *lstm_parm);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include "nnacl/common_func.h"
|
||||
|
||||
void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
||||
const int *paddings, const int tid, const int thread_num) {
|
||||
const int *paddings, int tid, int thread_num) {
|
||||
int in[4], out[4];
|
||||
for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
|
||||
out[0] = in[0] + paddings[0];
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
void Pad(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
||||
const int *paddings, const int tid, const int thread_num);
|
||||
const int *paddings, int tid, int thread_num);
|
||||
void MirrorPad(const float *input_data, float *output_data, const int *input_shape, const PadParameter *pad_param,
|
||||
int begin, int end);
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#include <float.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
|
||||
float maxf) {
|
||||
int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
|
||||
float minf, float maxf) {
|
||||
int win_w = pooling_param->window_w_;
|
||||
int win_h = pooling_param->window_h_;
|
||||
int channel = pooling_param->input_channel_;
|
||||
|
@ -144,8 +144,8 @@ int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pool
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
|
||||
float maxf) {
|
||||
void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
|
||||
float minf, float maxf) {
|
||||
int win_w = pooling_param->window_w_;
|
||||
int win_h = pooling_param->window_h_;
|
||||
int channel = pooling_param->input_channel_;
|
||||
|
|
|
@ -27,10 +27,10 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int AvgPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
|
||||
float maxf);
|
||||
void MaxPooling(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id, float minf,
|
||||
float maxf);
|
||||
int AvgPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
|
||||
float minf, float maxf);
|
||||
void MaxPooling(const float *input_ptr, float *output_ptr, const PoolingParameter *pooling_param, int task_id,
|
||||
float minf, float maxf);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id) {
|
||||
void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id) {
|
||||
float *negetive_slope_value = prelu_param_->slope_;
|
||||
int c4 = prelu_param_->channel_num_ / C4NUM;
|
||||
int channel_num = prelu_param_->channel_num_;
|
||||
|
@ -81,7 +81,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
|
|||
int c4_offset = tile_offset + k * C4NUM;
|
||||
int slope_offset = k * C4NUM;
|
||||
for (int l = 0; l < C4NUM; ++l) {
|
||||
float in_data = input_ptr[c4_offset + l];
|
||||
const float in_data = input_ptr[c4_offset + l];
|
||||
output_ptr[c4_offset + l] =
|
||||
(in_data < 0 ? in_data : 0) * negetive_slope_value[slope_offset + l] + (in_data > 0 ? in_data : 0);
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
|
|||
int offset = m * channel_num;
|
||||
for (int k = c_s; k < channel_num; ++k) {
|
||||
int c4_offset = offset + k;
|
||||
float in_data = input_ptr[c4_offset];
|
||||
const float in_data = input_ptr[c4_offset];
|
||||
if (in_data >= 0) {
|
||||
output_ptr[c4_offset] = in_data;
|
||||
} else {
|
||||
|
@ -104,7 +104,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
|
|||
}
|
||||
}
|
||||
|
||||
void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id) {
|
||||
void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id) {
|
||||
for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) {
|
||||
int cal_index;
|
||||
#ifdef ENABLE_NEON
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_id);
|
||||
void PRelu(float *input, float *output, const PReluParameter *prelu_param_, int task_id);
|
||||
|
||||
void PReluShareChannel(float *input, float *output, PReluParameter *prelu_param_, int task_id);
|
||||
void PReluShareChannel(float *input, float *output, const PReluParameter *prelu_param_, int task_id);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
#include "nnacl/reduce_parameter.h"
|
||||
#endif
|
||||
|
||||
int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -44,8 +44,8 @@ int ReduceMean(const int outer_size, const int inner_size, const int axis_size,
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -81,8 +81,8 @@ int ReduceSum(const int outer_size, const int inner_size, const int axis_size, c
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -102,8 +102,8 @@ int ReduceMax(const int outer_size, const int inner_size, const int axis_size, c
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -123,8 +123,8 @@ int ReduceMin(const int outer_size, const int inner_size, const int axis_size, c
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -144,8 +144,8 @@ int IntReduceMin(const int outer_size, const int inner_size, const int axis_size
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -166,8 +166,8 @@ int ReduceProd(const int outer_size, const int inner_size, const int axis_size,
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
|
||||
const int tid, const int thread_num) {
|
||||
int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
@ -190,8 +190,8 @@ int IntReduceProd(const int outer_size, const int inner_size, const int axis_siz
|
|||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
|
||||
float *dst_data, const int tid, const int thread_num) {
|
||||
int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
|
|
|
@ -22,22 +22,22 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int ReduceMean(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int ReduceSum(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int ReduceMax(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int ReduceMin(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int IntReduceMin(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int ReduceProd(const int outer_size, const int inner_size, const int axis_size, const float *src_data, float *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int IntReduceProd(const int outer_size, const int inner_size, const int axis_size, const int *src_data, int *dst_data,
|
||||
const int tid, const int thread_num);
|
||||
int ReduceSumSquare(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
|
||||
float *dst_data, const int tid, const int thread_num);
|
||||
int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
|
||||
int thread_num);
|
||||
int ReduceProd(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
|
||||
int thread_num);
|
||||
int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
|
||||
int thread_num);
|
||||
|
||||
#ifdef ENABLE_NNACL_INFER_SHAPE
|
||||
int ReduceInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param) {
|
||||
int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid,
|
||||
const ROIPoolingParameter *param) {
|
||||
int num_rois = param->output_n_;
|
||||
int units = UP_DIV(num_rois, param->thread_num_);
|
||||
int roi_st = tid * units;
|
||||
|
@ -52,7 +53,7 @@ int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, in
|
|||
|
||||
float bin_size_h = (float)roi_height / (float)pooled_height;
|
||||
float bin_size_w = (float)roi_width / (float)pooled_width;
|
||||
float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;
|
||||
const float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind;
|
||||
|
||||
for (int ph = 0; ph < pooled_height; ++ph) {
|
||||
for (int pw = 0; pw < pooled_width; ++pw) {
|
||||
|
|
|
@ -40,7 +40,8 @@ typedef struct ROIPoolingParameter {
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int ROIPooling(float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid, ROIPoolingParameter *param);
|
||||
int ROIPooling(const float *in_ptr, float *out_ptr, const float *roi, float *max_c, int tid,
|
||||
const ROIPoolingParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -66,7 +66,7 @@ void ScaleAxis(const float *in_data, float *out_data, const float *scale, const
|
|||
}
|
||||
|
||||
void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param) {
|
||||
const ScaleParameter *scale_param) {
|
||||
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
||||
int outer_start = task_id * outer_step;
|
||||
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
||||
|
@ -137,7 +137,7 @@ void ScaleAxisRelu(const float *in_data, float *out_data, const float *scale, co
|
|||
}
|
||||
|
||||
void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param) {
|
||||
const ScaleParameter *scale_param) {
|
||||
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
||||
int outer_start = task_id * outer_step;
|
||||
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
||||
|
@ -210,7 +210,7 @@ void ScaleAxisRelu6(const float *in_data, float *out_data, const float *scale, c
|
|||
}
|
||||
|
||||
void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param) {
|
||||
const ScaleParameter *scale_param) {
|
||||
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
||||
int outer_start = task_id * outer_step;
|
||||
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
void DoScale(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param);
|
||||
const ScaleParameter *scale_param);
|
||||
void DoScaleRelu(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param);
|
||||
const ScaleParameter *scale_param);
|
||||
void DoScaleRelu6(const float *in_data, float *out_data, const float *scale, const float *offset, int task_id,
|
||||
ScaleParameter *scale_param);
|
||||
const ScaleParameter *scale_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -46,7 +46,7 @@ void PadSliceParameterTo4D(SliceParameter *param) {
|
|||
param->param_length_ = DIMENSION_4D;
|
||||
}
|
||||
|
||||
void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id) {
|
||||
void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id) {
|
||||
int32_t out_dim1 = param->size_[1];
|
||||
int32_t out_dim2 = param->size_[2];
|
||||
int32_t out_dim3 = param->size_[3];
|
||||
|
@ -78,7 +78,7 @@ void DoSlice(const float *input, float *output, SliceParameter *param, int threa
|
|||
}
|
||||
}
|
||||
|
||||
void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) {
|
||||
void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param) {
|
||||
size_t copy_size = param->size_[3] * sizeof(float);
|
||||
size_t in_stride2 = param->shape_[3];
|
||||
size_t in_stride1 = param->shape_[2] * in_stride2;
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
void PadSliceParameterTo4D(SliceParameter *param);
|
||||
void DoSlice(const float *input, float *output, SliceParameter *param, int thread_id);
|
||||
void DoSliceNoParallel(const float *input, float *output, SliceParameter *param);
|
||||
void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id);
|
||||
void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -88,10 +88,10 @@ void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel) {
|
|||
}
|
||||
|
||||
// output = exp(input) / reduce_sum(exp(input), axis)
|
||||
void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter) {
|
||||
void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter) {
|
||||
int axis = parameter->axis_;
|
||||
int n_dim = parameter->n_dim_;
|
||||
int *input_shape = parameter->input_shape_;
|
||||
const int *input_shape = parameter->input_shape_;
|
||||
int inner_size = 1;
|
||||
int outter_size = 1;
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, SoftmaxParameter *parameter);
|
||||
void Softmax(const float *input_ptr, float *output_ptr, float *sum_data, const SoftmaxParameter *parameter);
|
||||
void SoftmaxLastAxis(const float *src, float *dst, int batch, int channel);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -214,7 +214,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, int32_t *filt
|
|||
return;
|
||||
}
|
||||
|
||||
void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num,
|
||||
void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num,
|
||||
int block_index) {
|
||||
// input format : nhwc
|
||||
int kernel_h = conv_param->kernel_h_;
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Im2ColPackUnitFp32(const float *input_data, ConvParameter *conv_param, float *packed_input, int real_cal_num,
|
||||
void Im2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, int real_cal_num,
|
||||
int block_index);
|
||||
|
||||
void PackHWCToWHC(const float *src, float *dst, int height, int width, int channel);
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
|
||||
// fp32 conv winograd
|
||||
void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num,
|
||||
int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func) {
|
||||
int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
|
||||
InputTransFunc func) {
|
||||
int input_unit = conv_param->input_unit_;
|
||||
int output_unit = conv_param->output_unit_;
|
||||
int in_channel = conv_param->input_channel_;
|
||||
|
@ -96,7 +97,8 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
|
|||
}
|
||||
|
||||
void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
|
||||
int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func) {
|
||||
int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
|
||||
OutputTransFunc func) {
|
||||
int output_unit = conv_param->output_unit_;
|
||||
int output_w = conv_param->output_w_;
|
||||
int output_h = conv_param->output_h_;
|
||||
|
|
|
@ -33,10 +33,12 @@ extern "C" {
|
|||
#endif
|
||||
// for fp32 winograd input/output transform
|
||||
void WinogradInputTransform(const float *input_data, float *trans_input, float *tmp_data, int cal_num,
|
||||
int out_tile_index, int out_w_block_num, ConvParameter *conv_param, InputTransFunc func);
|
||||
int out_tile_index, int out_w_block_num, const ConvParameter *conv_param,
|
||||
InputTransFunc func);
|
||||
|
||||
void WinogradOutputTransform(const float *gemm_out, float *out_data, const float *bias_data, int cal_num,
|
||||
int out_tile_index, int output_unit_num, ConvParameter *conv_param, OutputTransFunc func);
|
||||
int out_tile_index, int output_unit_num, const ConvParameter *conv_param,
|
||||
OutputTransFunc func);
|
||||
|
||||
// for int8 convolution 3x3 filter/input/output transform
|
||||
void Conv3x3Int8InputUnit(int16_t *tmp_data, int16_t *trans_input_data, size_t step, int input_zp);
|
||||
|
|
|
@ -46,8 +46,8 @@ int ConcatCPUKernel::ReSize() { return ConcatBaseCPUKernel::ReSize(); }
|
|||
|
||||
int ConcatCPUKernel::DoConcat(int task_id) {
|
||||
auto input_num = in_tensors_.size();
|
||||
std::vector<void *> inputs_addr(input_num, nullptr);
|
||||
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
|
||||
std::vector<const void *> inputs_addr(input_num, nullptr);
|
||||
std::vector<const int *> inputs_output_shape(input_num + 1, nullptr);
|
||||
|
||||
std::vector<std::vector<int>> shapes;
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
|
@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) {
|
|||
inputs_output_shape[input_num] = output_shape.data();
|
||||
auto output_addr = out_tensors_.at(0)->MutableData();
|
||||
|
||||
Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
|
||||
output_shape.size(), output_addr, task_id, thread_count_);
|
||||
Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id,
|
||||
thread_count_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue