fix cortex-m build
This commit is contained in:
parent
5b6806ae36
commit
6de593f411
|
@ -369,8 +369,9 @@ else()
|
|||
install(FILES ${TOP_DIR}/mindspore/lite/build/.commit_id DESTINATION ${RUNTIME_PKG_NAME}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
install(DIRECTORY ${flatbuffers_INC}/ DESTINATION ${RUNTIME_INC_DIR}/third_party COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
|
||||
if(NOT PLATFORM_MCU)
|
||||
install(DIRECTORY ${flatbuffers_INC}/ DESTINATION ${RUNTIME_INC_DIR}/third_party COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
if(PLATFORM_ARM64)
|
||||
if(SUPPORT_NPU)
|
||||
install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_DIR}/third_party/hiai_ddk/lib
|
||||
|
@ -797,6 +798,11 @@ elseif(WIN32)
|
|||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${LIB_LIST} DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
elseif(PLATFORM_MCU)
|
||||
__install_micro_wrapper()
|
||||
__install_micro_codegen()
|
||||
install(DIRECTORY ${TOP_DIR}/include/c_api/ DESTINATION ${RUNTIME_INC_DIR}/c_api
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
else()
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/include/kernel_interface.h DESTINATION ${RUNTIME_INC_DIR}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
|
|
|
@ -126,9 +126,7 @@ COPY_TASK_IMPL(float16_t, int64_t)
|
|||
#endif
|
||||
|
||||
#define GATHER_D_IMPL(type0, type1) \
|
||||
int GatherD_Input_##type0##_Index_##type1( \
|
||||
type0 *output, const type0 *input, type1 *index, const size_t *input_shape, const size_t input_shape_size, \
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim) { \
|
||||
GATHER_D_IMPL_DECLARATION(type0, type1) { \
|
||||
if (output == NULL || input == NULL || index == NULL || input_shape == NULL || output_shape == NULL) { \
|
||||
return NNACL_NULL_PTR; \
|
||||
} \
|
||||
|
|
|
@ -27,44 +27,27 @@ extern "C" {
|
|||
dim) \
|
||||
GatherD_Input_##type0##_Index_##type1(output, input, index, input_shape, input_shape_size, output_shape, \
|
||||
output_shape_size, dim)
|
||||
int GatherD_Input_bool_Index_int32_t(bool *output, const bool *input, int32_t *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_bool_Index_int64_t(bool *output, const bool *input, int64_t *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int16_t_Index_int32_t(int16_t *output, const int16_t *input, int32_t *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int16_t_Index_int64_t(int16_t *output, const int16_t *input, int64_t *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int32_t_Index_int32_t(int32_t *output, const int32_t *input, int *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int32_t_Index_int64_t(int32_t *output, const int32_t *input, int64_t *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int64_t_Index_int32_t(int64_t *output, const int64_t *input, int *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_int64_t_Index_int64_t(int64_t *output, const int64_t *input, int64_t *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_float_Index_int32_t(float *output, const float *input, int *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_float_Index_int64_t(float *output, const float *input, int64_t *index, const size_t *input_shape,
|
||||
const size_t input_shape_size, const size_t *output_shape,
|
||||
const size_t output_shape_size, const int dim);
|
||||
|
||||
#define GATHER_D_IMPL_DECLARATION(type0, type1) \
|
||||
int GatherD_Input_##type0##_Index_##type1(type0 *output, const type0 *input, type1 *index, \
|
||||
const size_t *input_shape, const size_t input_shape_size, \
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim)
|
||||
|
||||
GATHER_D_IMPL_DECLARATION(bool, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(bool, int64_t);
|
||||
GATHER_D_IMPL_DECLARATION(int16_t, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(int16_t, int64_t);
|
||||
GATHER_D_IMPL_DECLARATION(int32_t, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(int32_t, int64_t);
|
||||
GATHER_D_IMPL_DECLARATION(int64_t, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(int64_t, int64_t);
|
||||
GATHER_D_IMPL_DECLARATION(float, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(float, int64_t);
|
||||
#ifdef ENABLE_FP16
|
||||
int GatherD_Input_float16_t_Index_int32_t(float16_t *output, const float16_t *input, int *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
int GatherD_Input_float16_t_Index_int64_t(float16_t *output, const float16_t *input, int64_t *index,
|
||||
const size_t *input_shape, const size_t input_shape_size,
|
||||
const size_t *output_shape, const size_t output_shape_size, const int dim);
|
||||
GATHER_D_IMPL_DECLARATION(float16_t, int32_t);
|
||||
GATHER_D_IMPL_DECLARATION(float16_t, int64_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -94,7 +94,7 @@ void DoSlice(const void *input, void *output, const SliceParameter *param, int t
|
|||
}
|
||||
}
|
||||
|
||||
static bool WhetherCopyByAxis(const int begin[], const int end[], const int shape[], int dim) {
|
||||
static bool WhetherCopyByAxis(const int32_t *begin, const int32_t *end, const int32_t *shape, int dim) {
|
||||
for (int i = dim + 1; i < DIMENSION_8D; ++i) {
|
||||
if (begin[i] != 0 || end[i] != shape[i]) return false;
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ int SetOutputShapeFromParam(const TensorC *const *inputs, TensorC **outputs, con
|
|||
if (mul_block_shape == 0) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
int32_t output_shape[MAX_SHAPE_SIZE];
|
||||
int output_shape[MAX_SHAPE_SIZE];
|
||||
size_t output_shape_size = input_shape_size;
|
||||
output_shape[kNHWC_N] = input_shape[kNHWC_N] / mul_block_shape;
|
||||
output_shape[kNHWC_H] = input_shape[kNHWC_H] * block_shape[0] - crops[0] - crops[1];
|
||||
|
@ -100,7 +100,7 @@ int SetOutputShapeFromInput(const TensorC *const *inputs, TensorC **outputs) {
|
|||
if (mul_block_shape_ == 0) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
int32_t output_shape[MAX_SHAPE_SIZE];
|
||||
int output_shape[MAX_SHAPE_SIZE];
|
||||
size_t output_shape_size = input_shape_size;
|
||||
output_shape[kNHWC_N] = input_shape[kNHWC_N] / mul_block_shape_;
|
||||
output_shape[kNHWC_H] = input_shape[kNHWC_H] * block_shape[0] - crops[0] - crops[1];
|
||||
|
|
|
@ -44,7 +44,7 @@ int DepthToSpaceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
|
|||
if (block_size == 0 || input_shape[kNHWC_C] % (block_size * block_size) != 0 || input_shape[kNHWC_C] == 0) {
|
||||
return NNACL_PARAM_INVALID;
|
||||
}
|
||||
int32_t output_shape[MAX_SHAPE_SIZE];
|
||||
int output_shape[MAX_SHAPE_SIZE];
|
||||
size_t output_shape_size = input_shape_size;
|
||||
output_shape[kNHWC_N] = input_shape[kNHWC_N];
|
||||
output_shape[kNHWC_H] = input_shape[kNHWC_H] * block_size;
|
||||
|
|
|
@ -80,7 +80,7 @@ int SpaceSetOutputShapeFromInput(const TensorC *const *inputs, size_t inputs_siz
|
|||
padding_right = padding[3];
|
||||
block_w = block_shape[1];
|
||||
}
|
||||
int32_t output_shape[MAX_SHAPE_SIZE];
|
||||
int output_shape[MAX_SHAPE_SIZE];
|
||||
size_t output_shape_size = input->shape_size_;
|
||||
if (input->shape_[kNHWC_N] == 0 || block_shape[0] * block_w > INT_MAX / input->shape_[kNHWC_N]) {
|
||||
return NNACL_ERR;
|
||||
|
|
|
@ -38,7 +38,7 @@ int StackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
|
|||
if (input->shape_size_ > MAX_SHAPE_SIZE) {
|
||||
return NNACL_INPUT_TENSOR_ERROR;
|
||||
}
|
||||
int32_t output_shape[MAX_SHAPE_SIZE] = {0};
|
||||
int output_shape[MAX_SHAPE_SIZE] = {0};
|
||||
size_t output_shape_size = 0;
|
||||
ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
|
||||
int axis = param->axis_ < 0 ? (int)(param->axis_) + (int)(input->shape_size_) + 1 : param->axis_;
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
#include "nnacl/int8/arg_min_max_int8.h"
|
||||
#include <float.h>
|
||||
|
||||
void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
|
||||
int *after_axis_count) {
|
||||
void CalcParameter(const int32_t *shape, int dims_number, int axis, int32_t *pre_axis_count, int32_t *axis_count,
|
||||
int32_t *after_axis_count) {
|
||||
*pre_axis_count = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
*pre_axis_count = (*pre_axis_count) * shape[i];
|
||||
|
@ -82,7 +82,7 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output1, int8_t *output2, con
|
|||
}
|
||||
}
|
||||
|
||||
void Int8ArgMinMaxQuant(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxQuant(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
const ArgMinMaxParameter *param, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
int pre_axis_count = 1;
|
||||
int axis_count = 1;
|
||||
|
@ -105,7 +105,7 @@ int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_
|
|||
return real_out * output_inverse_scale + output_zp;
|
||||
}
|
||||
|
||||
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
|
@ -131,7 +131,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output1, int8_t *output2, co
|
|||
}
|
||||
}
|
||||
|
||||
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
|
@ -162,7 +162,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output1, int8_t *output2, co
|
|||
}
|
||||
}
|
||||
|
||||
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
|
@ -197,7 +197,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output1, int8_t *output2, co
|
|||
}
|
||||
}
|
||||
|
||||
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
bool out_value = param->out_value_;
|
||||
const float output_inverse_scale = 1.f / out_quant_arg->scale_;
|
||||
|
|
|
@ -23,15 +23,15 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void Int8ArgMinMaxQuant(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxQuant(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
const ArgMinMaxParameter *param, const QuantArg *in_quant, const QuantArg *out_quant);
|
||||
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant, const QuantArg *out_quant);
|
||||
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant, const QuantArg *out_quant);
|
||||
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant, const QuantArg *out_quant);
|
||||
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output1, int8_t *output2, const int *in_shape,
|
||||
void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output1, int8_t *output2, const int32_t *in_shape,
|
||||
ArgMinMaxParameter *param, const QuantArg *in_quant, const QuantArg *out_quant);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
#endif
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple) {
|
||||
void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int32_t *inShape,
|
||||
const int32_t *inStrides, const int32_t *outStrides, const int32_t *multiple) {
|
||||
int srcDimSize = inShape[dim];
|
||||
if (dim == ndim - 1) {
|
||||
for (int i = 0; i < multiple[dim]; i++) {
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape,
|
||||
const int *inStrides, const int *outStrides, const int *multiple);
|
||||
void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int32_t *inShape,
|
||||
const int32_t *inStrides, const int32_t *outStrides, const int32_t *multiple);
|
||||
void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
|
||||
ArithmeticParameter *param);
|
||||
|
||||
|
|
|
@ -216,7 +216,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec,
|
|||
}
|
||||
|
||||
void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para,
|
||||
int *index) {
|
||||
int32_t *index) {
|
||||
int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_);
|
||||
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_);
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
#include "nnacl/int8/batch_to_space_int8.h"
|
||||
|
||||
void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, int out_n,
|
||||
const int *block, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, int out_n,
|
||||
const int32_t *block, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_h = in_shape[1];
|
||||
|
@ -53,8 +53,9 @@ void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const in
|
|||
}
|
||||
}
|
||||
|
||||
void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
const int *crops, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg) {
|
||||
void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, int out_n,
|
||||
const int32_t *block, const int32_t *crops, const QuantArg *in_quant_arg,
|
||||
const QuantArg *out_quant_arg) {
|
||||
int block_h = block[0];
|
||||
int block_w = block[1];
|
||||
int in_h = in_shape[1];
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, int out_n,
|
||||
const int *block, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg);
|
||||
void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
|
||||
const int *crops, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg);
|
||||
void BatchToSpaceNoCropForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, int out_n,
|
||||
const int32_t *block, const QuantArg *in_quant_arg, const QuantArg *out_quant_arg);
|
||||
void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, int out_n,
|
||||
const int32_t *block, const int32_t *crops, const QuantArg *in_quant_arg,
|
||||
const QuantArg *out_quant_arg);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -25,7 +25,7 @@ void Int8Concat(int8_t **inputs, int8_t *output, const ConcatParameter *para, in
|
|||
int input_num = para->input_num_;
|
||||
int64_t count_unit_ = para->count_unit_;
|
||||
int64_t after_axis_size = para->after_axis_size;
|
||||
const int *output_shape = para->output_shapes_;
|
||||
const int32_t *output_shape = para->output_shapes_;
|
||||
int out_copy_size = output_shape[axis] * after_axis_size;
|
||||
QuantArg *input_quant = para->quant_arg_.in_args_;
|
||||
int output_zp = para->quant_arg_.out_args_.zp_;
|
||||
|
@ -37,7 +37,7 @@ void Int8Concat(int8_t **inputs, int8_t *output, const ConcatParameter *para, in
|
|||
|
||||
for (int k = start; k < end; k++) {
|
||||
for (int i = 0; i < input_num; i++) {
|
||||
const int *input_shape = para->input_shapes_[i];
|
||||
const int32_t *input_shape = para->input_shapes_[i];
|
||||
int64_t in_copy_size = input_shape[axis] * after_axis_size;
|
||||
const int8_t *input_ptr = inputs[i] + k * in_copy_size;
|
||||
if (input_quant[i].scale_ == output_scale && input_quant[i].zp_ == output_zp) {
|
||||
|
|
|
@ -84,9 +84,9 @@ void ConvDwInt8(int8_t *output_data, int32_t *row_buffer, const int8_t *input_da
|
|||
int end_h = MSMIN(start_h + step_h, conv_param->output_h_);
|
||||
|
||||
bool filter_per_channel = conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
|
||||
int *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
int32_t *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int32_t *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int32_t *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
|
||||
int intput_zp = conv_param->conv_quant_arg_.input_quant_args_[0].zp_;
|
||||
int output_zp = conv_param->conv_quant_arg_.output_quant_args_[0].zp_;
|
||||
|
@ -240,9 +240,9 @@ void ConvDw3x3Int8Row(int8_t *output, int8_t *buffer, const int8_t *input, const
|
|||
const ConvParameter *conv_param, int start_w, int end_w, int block_output_h, int block_output_w,
|
||||
int block_input_h, int block_input_w) {
|
||||
bool filter_per_channel = conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
|
||||
int *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
int32_t *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int32_t *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int32_t *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
int in_zp = conv_param->conv_quant_arg_.input_quant_args_[0].zp_;
|
||||
int out_zp = conv_param->conv_quant_arg_.output_quant_args_[0].zp_;
|
||||
int acc_min = conv_param->conv_quant_arg_.out_act_min_[0];
|
||||
|
@ -331,7 +331,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data
|
|||
#ifndef ENABLE_ARM32
|
||||
void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||
int width, int in_kh_step, int in_kw_step, int channel, int8_t in_zp, int32_t out_zp,
|
||||
const int *out_multiplier, const int *left_shift, const int *right_shift,
|
||||
const int32_t *out_multiplier, const int32_t *left_shift, const int32_t *right_shift,
|
||||
const int32_t acc_min, const int32_t acc_max, bool per_channel) {
|
||||
for (int c = 0; c < channel; c += 8) {
|
||||
int tmp_buffer[8];
|
||||
|
@ -386,24 +386,24 @@ void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *wei
|
|||
|
||||
#ifndef ENABLE_ARM64
|
||||
void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int32_t *out_multiplier,
|
||||
const int32_t *left_shift, const int32_t *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
bool per_channel) {
|
||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||
}
|
||||
|
||||
void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int32_t *out_multiplier,
|
||||
const int32_t *left_shift, const int32_t *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
bool per_channel) {
|
||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 2, 3, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||
}
|
||||
|
||||
void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int in_kh_step,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int *out_multiplier,
|
||||
const int *left_shift, const int *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
int in_kw_step, int channel, int8_t in_zp, int32_t out_zp, const int32_t *out_multiplier,
|
||||
const int32_t *left_shift, const int32_t *right_shift, int32_t acc_min, int32_t acc_max,
|
||||
bool per_channel) {
|
||||
ConvDw3x3Int8BorderPixel(dst, src, weight, bias, 3, 2, in_kh_step, in_kw_step, channel, in_zp, out_zp, out_multiplier,
|
||||
left_shift, right_shift, acc_min, acc_max, per_channel);
|
||||
|
@ -413,9 +413,9 @@ void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weig
|
|||
void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data,
|
||||
const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding) {
|
||||
bool filter_per_channel = conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
|
||||
int *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
int32_t *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_;
|
||||
int32_t *left_shift = conv_param->conv_quant_arg_.left_shift_;
|
||||
int32_t *right_shift = conv_param->conv_quant_arg_.right_shift_;
|
||||
int in_zp = conv_param->conv_quant_arg_.input_quant_args_[0].zp_;
|
||||
int out_zp = conv_param->conv_quant_arg_.output_quant_args_[0].zp_;
|
||||
int acc_min = conv_param->conv_quant_arg_.out_act_min_[0];
|
||||
|
@ -499,8 +499,8 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16
|
|||
/*conv depthwise sliding window perchannel int8 begin*/
|
||||
void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
|
||||
int width, int in_kh_step, int in_kw_step, int kernel_w, const int8_t *input_zp,
|
||||
const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
||||
const int *right_shift, const int32_t *acc_min, const int32_t *acc_max) {
|
||||
const int32_t *out_zp, const int32_t *out_multiplier, const int32_t *left_shift,
|
||||
const int32_t *right_shift, const int32_t *acc_min, const int32_t *acc_max) {
|
||||
int tmp_buffer[C8NUM];
|
||||
for (int i = 0; i < C8NUM; i++) {
|
||||
tmp_buffer[i] = 0;
|
||||
|
@ -535,8 +535,9 @@ void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight
|
|||
|
||||
void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom,
|
||||
int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
|
||||
const int8_t *in_zp, const int32_t *out_zp, const int *out_multiplier, const int *left_shift,
|
||||
const int *right_shift, const int32_t *acc_min, const int32_t *acc_max) {
|
||||
const int8_t *in_zp, const int32_t *out_zp, const int32_t *out_multiplier,
|
||||
const int32_t *left_shift, const int32_t *right_shift, const int32_t *acc_min,
|
||||
const int32_t *acc_max) {
|
||||
int8_t *dst_h = dst + top * sliding->out_h_step_;
|
||||
for (int oh = top; oh < bottom; oh++) {
|
||||
int ih = oh * conv_param->stride_h_ - conv_param->pad_u_;
|
||||
|
@ -630,11 +631,11 @@ void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *
|
|||
const int16_t *weight = weight_data + oc * sliding->kernel_step_;
|
||||
const int32_t *bias = bias_data + oc * C8NUM;
|
||||
|
||||
int *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_ + oc * C8NUM;
|
||||
int *left_shift = conv_param->conv_quant_arg_.left_shift_ + oc * C8NUM;
|
||||
int *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM;
|
||||
int *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM;
|
||||
int *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM;
|
||||
int32_t *out_multiplier = conv_param->conv_quant_arg_.quant_multiplier_ + oc * C8NUM;
|
||||
int32_t *left_shift = conv_param->conv_quant_arg_.left_shift_ + oc * C8NUM;
|
||||
int32_t *right_shift = conv_param->conv_quant_arg_.right_shift_ + oc * C8NUM;
|
||||
int32_t *acc_min = conv_param->conv_quant_arg_.out_act_min_ + oc * C8NUM;
|
||||
int32_t *acc_max = conv_param->conv_quant_arg_.out_act_max_ + oc * C8NUM;
|
||||
const int8_t *in_zp = input_zp + oc * C8NUM;
|
||||
const int32_t *out_zp = output_zp + oc * C8NUM;
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include "nnacl/int8/depth_to_space_int8.h"
|
||||
#include <string.h>
|
||||
|
||||
void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param,
|
||||
void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, DepthToSpaceParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
|
||||
int32_t block_size = param->block_size_;
|
||||
int32_t in_shape_dim2 = in_shape[2];
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_shape, DepthToSpaceParameter *param,
|
||||
void DepthToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int32_t *in_shape, DepthToSpaceParameter *param,
|
||||
QuantArg *in_quant_arg, QuantArg *out_quant_arg);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "nnacl/int8/dynamic_gather_int8.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
|
||||
int indices_element_size, float *output, const float *scale_in, const int *zp_in) {
|
||||
void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int32_t *indices,
|
||||
int indices_element_size, float *output, const float *scale_in, const int32_t *zp_in) {
|
||||
for (int m = 0; m < outer_size; ++m) {
|
||||
const int8_t *int8_in_m = input + inner_size * m * limit;
|
||||
float *int8_out_m = output + inner_size * m * indices_element_size;
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int *indices,
|
||||
int indices_element_size, float *output, const float *scale_in, const int *zp_in);
|
||||
void DynamicGather(const int8_t *input, int outer_size, int inner_size, int limit, const int32_t *indices,
|
||||
int indices_element_size, float *output, const float *scale_in, const int32_t *zp_in);
|
||||
void DynamicGatherArm64(const int8_t *src, float *output, int count_16, int zp, float scale);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#include "nnacl/int8/fixed_point.h"
|
||||
|
||||
void DynamicMatmul4x4x16AIWI(const int8_t *a, const int8_t *b, float *out, size_t deep4, float *multi_scales,
|
||||
float *bias, size_t row, size_t col, size_t stride, const int *a_sums, const int *b_sums,
|
||||
int64_t a_zp, int64_t b_zp_sum) {
|
||||
float *bias, size_t row, size_t col, size_t stride, const int32_t *a_sums,
|
||||
const int32_t *b_sums, int64_t a_zp, int64_t b_zp_sum) {
|
||||
/* *
|
||||
* row4x4-major * row4x16-major => (int8)row-major
|
||||
* support activation per-layer symmetric && weight per-layer/per-channel symmetric
|
||||
|
@ -321,7 +321,7 @@ void PackInput2Col4x4(const int8_t *src_input, int8_t *packed_input, int row, in
|
|||
}
|
||||
}
|
||||
|
||||
void CalcWeightSums(const int8_t *weight, int row, int col, int *dst, DataOrder order) {
|
||||
void CalcWeightSums(const int8_t *weight, int row, int col, int32_t *dst, DataOrder order) {
|
||||
if (order == RowMajor) {
|
||||
for (int c = 0; c < col; ++c) {
|
||||
int sum = 0;
|
||||
|
@ -342,7 +342,7 @@ void CalcWeightSums(const int8_t *weight, int row, int col, int *dst, DataOrder
|
|||
return;
|
||||
}
|
||||
|
||||
void CalcPartWeightSums(const int8_t *weight, int row, int stride, int cur_col, int *dst, DataOrder order) {
|
||||
void CalcPartWeightSums(const int8_t *weight, int row, int stride, int cur_col, int32_t *dst, DataOrder order) {
|
||||
if (order == RowMajor) {
|
||||
for (int c = 0; c < cur_col; ++c) {
|
||||
int sum = 0;
|
||||
|
|
|
@ -29,16 +29,16 @@ void PackInput4x4(const int8_t *src_input, int8_t *packed_input, size_t input_ch
|
|||
void DynamicMatmul4x16x4AIWI(const int8_t *a, const int8_t *b, const float *bias, float *dst, int row, int col,
|
||||
int deep, int deep16, size_t stride, int input_zp, float input_scale,
|
||||
const float *filter_scale, const int filter_zp, bool filter_per_channel);
|
||||
void CalcWeightSums(const int8_t *weight, int row, int col, int *dst, DataOrder order);
|
||||
void CalcPartWeightSums(const int8_t *weight, int row, int stride, int cur_col, int *dst, DataOrder order);
|
||||
void CalcWeightSums(const int8_t *weight, int row, int col, int32_t *dst, DataOrder order);
|
||||
void CalcPartWeightSums(const int8_t *weight, int row, int stride, int cur_col, int32_t *dst, DataOrder order);
|
||||
#ifdef ENABLE_ARM64
|
||||
void DynamicMatmulSdot4x4x16AIWI(const int8_t *a, const int8_t *b, float *out, size_t deep4, float *multi_scales,
|
||||
float *bias, size_t row, size_t col, size_t stride, const int *a_sums,
|
||||
const int *b_sums, int64_t a_zp, int64_t b_zp_sum);
|
||||
float *bias, size_t row, size_t col, size_t stride, const int32_t *a_sums,
|
||||
const int32_t *b_sums, int64_t a_zp, int64_t b_zp_sum);
|
||||
#endif
|
||||
void DynamicMatmul4x4x16AIWI(const int8_t *a, const int8_t *b, float *out, size_t deep4, float *multi_scales,
|
||||
float *bias, size_t row, size_t col, size_t stride, const int *a_sums, const int *b_sums,
|
||||
int64_t a_zp, int64_t b_zp_sum);
|
||||
float *bias, size_t row, size_t col, size_t stride, const int32_t *a_sums,
|
||||
const int32_t *b_sums, int64_t a_zp, int64_t b_zp_sum);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -166,7 +166,7 @@ int32_t reciprocal_on_interval_between_0_1(int32_t a) {
|
|||
return Rescale(x, 2 - 1, 0);
|
||||
}
|
||||
|
||||
int32_t ComputerReciprocal(int32_t x, uint32_t x_digits, int *recip_shift) {
|
||||
int32_t ComputerReciprocal(int32_t x, uint32_t x_digits, int32_t *recip_shift) {
|
||||
uint32_t leading_zreos_plus_one = CountLeadingZeroBits((uint32_t)x);
|
||||
*recip_shift = x_digits - leading_zreos_plus_one;
|
||||
const int32_t shifted_minus_one = (int32_t)(((uint32_t)x << leading_zreos_plus_one) - ((uint32_t)(1) << 31));
|
||||
|
@ -190,7 +190,7 @@ int exp_on_interval_values(int a) {
|
|||
}
|
||||
|
||||
void exp_barrel_shifter(int exponent, int muliplier, int integer_bits, int fractional_bits, int remainder,
|
||||
int *result) {
|
||||
int32_t *result) {
|
||||
if (integer_bits > exponent) {
|
||||
int total_shift = integer_bits > exponent ? fractional_bits + exponent : 0;
|
||||
*result = BitsSelect(MaskIfNonZero(BitAnd(remainder, (1 << (uint32_t)total_shift))),
|
||||
|
|
|
@ -55,7 +55,7 @@ int32_t Rescale(int x, int kIntegerBitsSrc, int kIntegerBitsDst);
|
|||
|
||||
uint32_t CountLeadingSignBits(int32_t x);
|
||||
|
||||
int32_t ComputerReciprocal(int32_t x, uint32_t x_digits, int *recip_shift);
|
||||
int32_t ComputerReciprocal(int32_t x, uint32_t x_digits, int32_t *recip_shift);
|
||||
|
||||
int exp_on_negative_values(int a, const int tIntegerBits);
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int GatherNdInt8(int8_t *input, int8_t *output, const int *in_offset, int area, int count, GatherQuantArg param) {
|
||||
int GatherNdInt8(int8_t *input, int8_t *output, const int32_t *in_offset, int area, int count, GatherQuantArg param) {
|
||||
double alpha = param.alpha_;
|
||||
int z1 = param.zp_in_;
|
||||
int z2 = param.zp_out_;
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int GatherNdInt8(int8_t *in_data, int8_t *out_data, const int *in_offset, int area, int count, GatherQuantArg param);
|
||||
int GatherNdInt8(int8_t *in_data, int8_t *out_data, const int32_t *in_offset, int area, int count,
|
||||
GatherQuantArg param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
|
||||
int GatherInt8Int32Index(const int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit,
|
||||
const int *indices, int indices_element_size, GatherQuantArg para) {
|
||||
const int32_t *indices, int indices_element_size, GatherQuantArg para) {
|
||||
double alpha = para.alpha_;
|
||||
int z1 = para.zp_in_;
|
||||
int z2 = para.zp_out_;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
int GatherInt8Int32Index(const int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit,
|
||||
const int *indices, int indices_element_size, GatherQuantArg para);
|
||||
const int32_t *indices, int indices_element_size, GatherQuantArg para);
|
||||
|
||||
int GatherInt8Int64Index(const int8_t *in_data, int8_t *out_data, int outer_size, int inner_size, int limit,
|
||||
const int64_t *indices, int indices_element_size, GatherQuantArg para);
|
||||
|
|
|
@ -235,8 +235,8 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
|
|||
return;
|
||||
}
|
||||
|
||||
void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int col_4, int deep_16,
|
||||
const int *input_sum, const int *bias) {
|
||||
void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int32_t *dst, int row_4, int col_4, int deep_16,
|
||||
const int32_t *input_sum, const int32_t *bias) {
|
||||
/* row4x16-major * row16x4-major => row4x4-major */
|
||||
for (int r = 0; r < row_4; r++) {
|
||||
for (int c = 0; c < col_4; c++) {
|
||||
|
@ -799,8 +799,8 @@ void CalcInputSums(const int8_t *input, int row, int col, int weight_zp, int32_t
|
|||
}
|
||||
|
||||
// dst: bias + depth*input_zp*weight_zp - input_zp*weight_col_sums
|
||||
void CalcWeightBiasSums(const int8_t *weight, int row, int col, int input_zp, const int *weight_zp_ptr, const int *bias,
|
||||
int32_t *dst, DataOrder order, bool filter_per_channel) {
|
||||
void CalcWeightBiasSums(const int8_t *weight, int row, int col, int input_zp, const int32_t *weight_zp_ptr,
|
||||
const int32_t *bias, int32_t *dst, DataOrder order, bool filter_per_channel) {
|
||||
for (int c = 0; c < col; ++c) {
|
||||
int sum = 0;
|
||||
for (int r = 0; r < row; ++r) {
|
||||
|
@ -819,7 +819,7 @@ void CalcWeightBiasSums(const int8_t *weight, int row, int col, int input_zp, co
|
|||
}
|
||||
|
||||
void CalcPartWeightBiasSums(const int8_t *weight, int row, int stride, int cur_col, int input_zp,
|
||||
const int *weight_zp_ptr, const int *bias, int *dst, DataOrder order,
|
||||
const int32_t *weight_zp_ptr, const int32_t *bias, int32_t *dst, DataOrder order,
|
||||
bool filter_per_channel) {
|
||||
for (int c = 0; c < cur_col; ++c) {
|
||||
int sum = 0;
|
||||
|
|
|
@ -27,8 +27,8 @@ extern "C" {
|
|||
/* 4x16 16x4 -> 4x4 */
|
||||
/* sdot 4x4 4x16 -> 4x16 */
|
||||
/* matmul */
|
||||
void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int col_4, int deep_16,
|
||||
const int *input_sum, const int *bias);
|
||||
void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int32_t *dst, int row_4, int col_4, int deep_16,
|
||||
const int32_t *input_sum, const int32_t *bias);
|
||||
void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void RowMajor2Col16x4MajorInt8(const int8_t *src, int8_t *dst, int row, int col);
|
||||
void RowMajor2Col4x16MajorInt8(const int8_t *src, int8_t *dst, int row, int col);
|
||||
|
@ -36,10 +36,10 @@ void RowMajor2Col4x16MajorPartInt8(const int8_t *src, int8_t *dst, int row, int
|
|||
void PackInput2Col4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, int row,
|
||||
int col, int row_stride, int32_t filter_zp);
|
||||
void CalcInputSums(const int8_t *input, int row, int col, int weight_zp, int32_t *dst, DataOrder order);
|
||||
void CalcWeightBiasSums(const int8_t *weight, int row, int col, int input_zp, const int *weight_zp_ptr, const int *bias,
|
||||
int32_t *dst, DataOrder order, bool filter_per_channel);
|
||||
void CalcWeightBiasSums(const int8_t *weight, int row, int col, int input_zp, const int32_t *weight_zp_ptr,
|
||||
const int32_t *bias, int32_t *dst, DataOrder order, bool filter_per_channel);
|
||||
void CalcPartWeightBiasSums(const int8_t *weight, int row, int stride, int cur_col, int input_zp,
|
||||
const int *weight_zp_ptr, const int *bias, int *dst, DataOrder order,
|
||||
const int32_t *weight_zp_ptr, const int32_t *bias, int32_t *dst, DataOrder order,
|
||||
bool filter_per_channel);
|
||||
void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, const int32_t *a_sums,
|
||||
const int32_t *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
|
||||
|
@ -73,17 +73,18 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
|
|||
int32_t maxi, size_t per_channel, const int32_t *filter_zp);
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums,
|
||||
const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift,
|
||||
int32_t *right_shift, int row, int col, int stride, int filter_peroc);
|
||||
void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16,
|
||||
const int32_t *a_sums, const int32_t *bias, int act_min, int act_max, int out_zp,
|
||||
int32_t *multiplier, int32_t *left_shift, int32_t *right_shift, int row, int col, int stride,
|
||||
int filter_peroc);
|
||||
|
||||
void MatMulR4Int8Neon64(const int8_t *a, const int8_t *b, int32_t *dst, int row4, int col4, int deep16,
|
||||
const int *input_sum, const int *bias);
|
||||
const int32_t *input_sum, const int32_t *bias);
|
||||
#endif
|
||||
#ifdef ENABLE_ARM32
|
||||
void MatmulInt8Neon32(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16,
|
||||
const int *input_sums, const int *weight_bias, int act_min, int act_max, int out_zp,
|
||||
int *multiplier, int *left_shift, int *right_shift, int stride, int per_channel);
|
||||
const int32_t *input_sums, const int32_t *weight_bias, int act_min, int act_max, int out_zp,
|
||||
int32_t *multiplier, int32_t *left_shift, int32_t *right_shift, int stride, int per_channel);
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i
|
|||
}
|
||||
|
||||
void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
|
||||
const MulQuantArg *quant_arg, int *index) {
|
||||
const MulQuantArg *quant_arg, int32_t *index) {
|
||||
int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
|
||||
int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
|
||||
int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
|
||||
|
|
|
@ -52,7 +52,7 @@ int TransOut2InputDimIndexInt8(int out_dim_index, int left_pad, int in_dim, int
|
|||
return MSMAX(index_sum - out_dim_index, 0);
|
||||
}
|
||||
|
||||
int GetInputFlattenIndexInt8(int out_flatten_index, const int *input_shape, const PadParameter *pad_param) {
|
||||
int GetInputFlattenIndexInt8(int out_flatten_index, const int32_t *input_shape, const PadParameter *pad_param) {
|
||||
int in_flatten_index = 0;
|
||||
int i;
|
||||
for (i = 0; i < COMM_SHAPE_SIZE; ++i) {
|
||||
|
@ -66,8 +66,8 @@ int GetInputFlattenIndexInt8(int out_flatten_index, const int *input_shape, cons
|
|||
return in_flatten_index;
|
||||
}
|
||||
|
||||
void MirrorPadInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const PadParameter *pad_param,
|
||||
int begin, int end) {
|
||||
void MirrorPadInt8(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const PadParameter *pad_param, int begin, int end) {
|
||||
int i = 0;
|
||||
for (i = begin; i < end; ++i) {
|
||||
output_data[i] = input_data[GetInputFlattenIndexInt8(i, input_shape, pad_param)];
|
||||
|
|
|
@ -26,8 +26,8 @@ extern "C" {
|
|||
#endif
|
||||
int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
|
||||
const int32_t *paddings, const int tid, const int thread_num);
|
||||
void MirrorPadInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape, const PadParameter *pad_param,
|
||||
int begin, int end);
|
||||
void MirrorPadInt8(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const PadParameter *pad_param, int begin, int end);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -26,7 +26,7 @@ const double dNormalizer = 0x1p54;
|
|||
const int dNormalizerBias = 54;
|
||||
const int iMantissaBits = 31;
|
||||
|
||||
void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int *right_shift) {
|
||||
void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int32_t *right_shift) {
|
||||
if (quantized_multiplier == NULL || right_shift == NULL) {
|
||||
return;
|
||||
}
|
||||
|
@ -35,8 +35,8 @@ void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantiz
|
|||
*right_shift = -shift;
|
||||
}
|
||||
|
||||
void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
|
||||
int *right_shift) {
|
||||
void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t *quantized_multiplier,
|
||||
int32_t *left_shift, int32_t *right_shift) {
|
||||
int shift = 0;
|
||||
QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift);
|
||||
shift = -shift;
|
||||
|
@ -49,8 +49,8 @@ void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t
|
|||
}
|
||||
}
|
||||
|
||||
void QuantizeRoundParameterWithSinglePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
|
||||
int *right_shift) {
|
||||
void QuantizeRoundParameterWithSinglePrecision(double double_multiplier, int32_t *quantized_multiplier,
|
||||
int32_t *left_shift, int32_t *right_shift) {
|
||||
int shift = 0;
|
||||
const uint32_t scale_bits = (uint32_t)(double_multiplier);
|
||||
/* multiplier is in[0x40000000, 0x7FFFFF80] range */
|
||||
|
@ -74,7 +74,8 @@ uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { return roun
|
|||
|
||||
int32_t QuantizeToInt8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); }
|
||||
|
||||
void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi) {
|
||||
void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int32_t *mini,
|
||||
int32_t *maxi) {
|
||||
int32_t min = INT8_MIN;
|
||||
int32_t max = INT8_MAX;
|
||||
int32_t quantized_zero = QuantizeToInt8(0, scale, zp);
|
||||
|
@ -108,7 +109,7 @@ void Dequantize(const int8_t *input_data, int length, float scale, int zero_poin
|
|||
}
|
||||
}
|
||||
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) {
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int32_t *shift) {
|
||||
if (quantized_multiplier == NULL || shift == NULL) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -51,10 +51,10 @@ typedef struct ConcatQuantArg {
|
|||
} ConcatQuantArg;
|
||||
|
||||
typedef struct PreluQuantArg {
|
||||
int *input_sizes_;
|
||||
int32_t *input_sizes_;
|
||||
int output_size_;
|
||||
int **input_shapes_;
|
||||
int *output_shape_;
|
||||
int32_t **input_shapes_;
|
||||
int32_t *output_shape_;
|
||||
size_t input_num_;
|
||||
size_t output_dim_;
|
||||
float alpha_;
|
||||
|
@ -91,7 +91,7 @@ typedef struct GatherQuantArg {
|
|||
|
||||
typedef struct DynamicGatherQuantArg {
|
||||
float *scale_in_;
|
||||
int *zp_in_;
|
||||
int32_t *zp_in_;
|
||||
} DynamicGatherQuantArg;
|
||||
|
||||
typedef struct SoftmaxQuantArg {
|
||||
|
@ -164,8 +164,8 @@ typedef struct LeakyReluQuantArg {
|
|||
PreluQuantArg quant_arg;
|
||||
float slope_;
|
||||
int64_t axis_;
|
||||
int *in_shape_;
|
||||
int *out_shape_;
|
||||
int32_t *in_shape_;
|
||||
int32_t *out_shape_;
|
||||
int input_dim_;
|
||||
int element_num;
|
||||
} LeakyReluQuantArg;
|
||||
|
@ -196,21 +196,22 @@ typedef struct ResizeFloatScaleQuantArg {
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int32_t *shift);
|
||||
|
||||
void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int *right_shift);
|
||||
void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int32_t *right_shift);
|
||||
|
||||
void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
|
||||
int *right_shift);
|
||||
void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t *quantized_multiplier,
|
||||
int32_t *left_shift, int32_t *right_shift);
|
||||
|
||||
void QuantizeRoundParameterWithSinglePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
|
||||
int *right_shift);
|
||||
void QuantizeRoundParameterWithSinglePrecision(double double_multiplier, int32_t *quantized_multiplier,
|
||||
int32_t *left_shift, int32_t *right_shift);
|
||||
|
||||
uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp);
|
||||
|
||||
int32_t QuantizeToInt8(float real_value, float scale, int32_t zp);
|
||||
|
||||
void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi);
|
||||
void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int32_t *mini,
|
||||
int32_t *maxi);
|
||||
// quantize from float to int8
|
||||
void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data);
|
||||
|
||||
|
|
|
@ -156,8 +156,8 @@ int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
||||
const int *output_shape, const bool align_corners, int tid, int thread_num) {
|
||||
int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const int32_t *output_shape, const bool align_corners, int tid, int thread_num) {
|
||||
int batch, y, x, c;
|
||||
c = output_shape[3];
|
||||
int in_h, in_w, new_height, new_width;
|
||||
|
@ -195,8 +195,8 @@ void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32
|
|||
*nearest = *nearest < in_size ? *nearest : in_size - 1;
|
||||
}
|
||||
|
||||
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
||||
const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
||||
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const int32_t *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
||||
const QuantArg *quant_in, const QuantArg *quant_out, int tid, int thread_num) {
|
||||
const int base_offset = 20;
|
||||
int32_t batch, y, x, c;
|
||||
|
|
|
@ -34,11 +34,11 @@ int ResizeBilinearWithFloatScaleInt8(const int8_t *input_ptr, int8_t *output_ptr
|
|||
int out_h, int out_w, int channel, int index, int count,
|
||||
ResizeFloatScaleQuantArg quant_arg);
|
||||
|
||||
int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
||||
const int *output_shape, const bool align_corners, int tid, int thread_num);
|
||||
int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const int32_t *output_shape, const bool align_corners, int tid, int thread_num);
|
||||
|
||||
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int *input_shape,
|
||||
const int *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
||||
int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, const int32_t *input_shape,
|
||||
const int32_t *output_shape, const bool align_corners, const QuantMulArg *multiplier,
|
||||
const QuantArg *quant_in, const QuantArg *quant_out, int tid, int thread_num);
|
||||
|
||||
void ComputeNearestNeighborInt(const int32_t pos, const int in_size, const int32_t new_size, const bool align_corners,
|
||||
|
|
|
@ -17,11 +17,11 @@
|
|||
#include "nnacl/int8/softmax_int8.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data,
|
||||
int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int32_t *exp_data, int32_t *sum_data,
|
||||
const SoftmaxQuantArg *quant_param, const SoftmaxParameter *parameter) {
|
||||
int32_t axis = parameter->axis_;
|
||||
int n_dim = parameter->n_dim_;
|
||||
const int *input_shape = parameter->input_shape_;
|
||||
const int32_t *input_shape = parameter->input_shape_;
|
||||
int axis_shape_size = input_shape[axis];
|
||||
|
||||
int inner_size = 1;
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int *exp_data, int *sum_data,
|
||||
int SoftmaxInt8(const int8_t *input_ptr, int8_t *output_ptr, int count, int32_t *exp_data, int32_t *sum_data,
|
||||
const SoftmaxQuantArg *quant_param, const SoftmaxParameter *parameter);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
#include "nnacl/int8/space_to_batch_int8.h"
|
||||
#include "nnacl/common_func.h"
|
||||
|
||||
void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape,
|
||||
const int *out_shape) {
|
||||
void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int32_t *block_sizes, const int32_t *in_shape,
|
||||
const int32_t *out_shape) {
|
||||
int out_dim0 = out_shape[0];
|
||||
int out_dim1 = out_shape[1];
|
||||
int out_dim2 = out_shape[2];
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape,
|
||||
const int *out_shape);
|
||||
void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int32_t *block_sizes, const int32_t *in_shape,
|
||||
const int32_t *out_shape);
|
||||
void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, SpaceToBatchParameter *param, int32_t zp);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int Int8DoSplit(const int8_t *in_data, int8_t **out_data, const int *input_shape, int offset, int num_unit,
|
||||
int Int8DoSplit(const int8_t *in_data, int8_t **out_data, const int32_t *input_shape, int offset, int num_unit,
|
||||
const SplitParameter *param) {
|
||||
if (in_data == NULL || out_data == NULL) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
const int num_split = param->num_split_;
|
||||
const int *split_sizes = param->split_sizes_;
|
||||
const int *strides = param->strides_;
|
||||
const int32_t *split_sizes = param->split_sizes_;
|
||||
const int32_t *strides = param->strides_;
|
||||
const int split_dim = param->split_dim_;
|
||||
int in_stride = strides[split_dim];
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int Int8DoSplit(const int8_t *in_data, int8_t **out_data, const int *input_shape, int offset, int num_unit,
|
||||
int Int8DoSplit(const int8_t *in_data, int8_t **out_data, const int32_t *input_shape, int offset, int num_unit,
|
||||
const SplitParameter *split_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in
|
|||
}
|
||||
|
||||
void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
|
||||
const SubQuantArg *para, int *index) {
|
||||
const SubQuantArg *para, int32_t *index) {
|
||||
int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
|
||||
int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
|
||||
int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
*/
|
||||
|
||||
#include "nnacl/int8/transpose_int8.h"
|
||||
void TransposeDim2Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape) {
|
||||
void TransposeDim2Int8(const int8_t *in_data, int8_t *out_data, const int32_t *strides, const int32_t *out_strides,
|
||||
const int32_t *perm, const int32_t *output_shape) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int output0 = output_shape[0];
|
||||
|
@ -31,8 +31,8 @@ void TransposeDim2Int8(const int8_t *in_data, int8_t *out_data, const int *strid
|
|||
return;
|
||||
}
|
||||
|
||||
void TransposeDim3Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape) {
|
||||
void TransposeDim3Int8(const int8_t *in_data, int8_t *out_data, const int32_t *strides, const int32_t *out_strides,
|
||||
const int32_t *perm, const int32_t *output_shape) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
|
@ -54,8 +54,8 @@ void TransposeDim3Int8(const int8_t *in_data, int8_t *out_data, const int *strid
|
|||
}
|
||||
}
|
||||
|
||||
void TransposeDim4Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape) {
|
||||
void TransposeDim4Int8(const int8_t *in_data, int8_t *out_data, const int32_t *strides, const int32_t *out_strides,
|
||||
const int32_t *perm, const int32_t *output_shape) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
|
@ -86,8 +86,8 @@ void TransposeDim4Int8(const int8_t *in_data, int8_t *out_data, const int *strid
|
|||
}
|
||||
}
|
||||
|
||||
void TransposeDim5Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape) {
|
||||
void TransposeDim5Int8(const int8_t *in_data, int8_t *out_data, const int32_t *strides, const int32_t *out_strides,
|
||||
const int32_t *perm, const int32_t *output_shape) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
|
@ -125,8 +125,8 @@ void TransposeDim5Int8(const int8_t *in_data, int8_t *out_data, const int *strid
|
|||
}
|
||||
}
|
||||
|
||||
void TransposeDim6Int8(const int8_t *in_data, int8_t *out_data, const int *strides, const int *out_strides,
|
||||
const int *perm, const int *output_shape) {
|
||||
void TransposeDim6Int8(const int8_t *in_data, int8_t *out_data, const int32_t *strides, const int32_t *out_strides,
|
||||
const int32_t *perm, const int32_t *output_shape) {
|
||||
const int stride0 = strides[perm[0]];
|
||||
const int stride1 = strides[perm[1]];
|
||||
const int stride2 = strides[perm[2]];
|
||||
|
@ -172,16 +172,16 @@ void TransposeDim6Int8(const int8_t *in_data, int8_t *out_data, const int *strid
|
|||
}
|
||||
}
|
||||
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int32_t *output_shape,
|
||||
const TransposeParameter *transpose_param) {
|
||||
NNACL_CHECK_NULL_RETURN_ERR(in_data);
|
||||
NNACL_CHECK_NULL_RETURN_ERR(out_data);
|
||||
NNACL_CHECK_NULL_RETURN_ERR(output_shape);
|
||||
NNACL_CHECK_NULL_RETURN_ERR(transpose_param);
|
||||
|
||||
const int *perm = transpose_param->perm_;
|
||||
const int *strides = transpose_param->strides_;
|
||||
const int *out_strides = transpose_param->out_strides_;
|
||||
const int32_t *perm = transpose_param->perm_;
|
||||
const int32_t *strides = transpose_param->strides_;
|
||||
const int32_t *out_strides = transpose_param->out_strides_;
|
||||
const int num_axes = transpose_param->num_axes_;
|
||||
|
||||
// check if transpose is needed
|
||||
|
@ -221,16 +221,16 @@ int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_s
|
|||
return NNACL_OK;
|
||||
}
|
||||
|
||||
void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
|
||||
void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int32_t *output_shape,
|
||||
const TransposeParameter *transpose_param, int task_id, int thread_num) {
|
||||
NNACL_CHECK_NULL_RETURN_VOID(in_data);
|
||||
NNACL_CHECK_NULL_RETURN_VOID(out_data);
|
||||
NNACL_CHECK_NULL_RETURN_VOID(output_shape);
|
||||
NNACL_CHECK_NULL_RETURN_VOID(transpose_param);
|
||||
NNACL_CHECK_ZERO_RETURN(thread_num);
|
||||
const int *perm = transpose_param->perm_;
|
||||
const int *strides = transpose_param->strides_;
|
||||
const int *out_strides = transpose_param->out_strides_;
|
||||
const int32_t *perm = transpose_param->perm_;
|
||||
const int32_t *strides = transpose_param->strides_;
|
||||
const int32_t *out_strides = transpose_param->out_strides_;
|
||||
int num_axes = transpose_param->num_axes_;
|
||||
size_t data_size = (size_t)((*out_strides) * output_shape[0]);
|
||||
size_t offset_size = UP_DIV(data_size, thread_num);
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
|
||||
int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int32_t *output_shape,
|
||||
const TransposeParameter *transpose_param);
|
||||
void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *output_shape,
|
||||
void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int32_t *output_shape,
|
||||
const TransposeParameter *transpose_param, int task_id, int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, int row_4, int col_4, int deep_16,
|
||||
const int *input_sum, const int *bias);
|
||||
typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int32_t *dst, int row_4, int col_4, int deep_16,
|
||||
const int32_t *input_sum, const int32_t *bias);
|
||||
|
||||
typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias,
|
||||
|
@ -31,7 +31,7 @@ typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst
|
|||
size_t stride, const int32_t *input_sum, const int32_t *bias,
|
||||
const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
|
||||
int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
|
||||
const int *filter_zp);
|
||||
const int32_t *filter_zp);
|
||||
|
||||
typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2, OutType_NC4HW4 = 3 } OutType;
|
||||
|
||||
|
|
|
@ -215,6 +215,9 @@ endif()
|
|||
if(PLATFORM_X86_64)
|
||||
add_compile_definitions(LINUX_RUNTIME)
|
||||
endif()
|
||||
if(PLATFORM_MCU)
|
||||
add_compile_definitions(ENABLE_MCU)
|
||||
endif()
|
||||
if(TOOLCHAIN_NAME STREQUAL "himix200")
|
||||
set(TARGET_HIMIX on)
|
||||
set(TARGET_HIMIX200 on)
|
||||
|
@ -518,6 +521,10 @@ elseif(WIN32)
|
|||
else()
|
||||
set(RUNTIME_COMPONENT_NAME "win-x64")
|
||||
endif()
|
||||
elseif(PLATFORM_MCU)
|
||||
if(TOOLCHAIN_NAME STREQUAL "cortex-m7")
|
||||
set(RUNTIME_COMPONENT_NAME "cortex-m7")
|
||||
endif()
|
||||
else()
|
||||
if((CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") AND MSLITE_ENABLE_ACL)
|
||||
set(RUNTIME_COMPONENT_NAME "linux-aarch64")
|
||||
|
@ -532,10 +539,11 @@ set(CORE_DIR ${TOP_DIR}/mindspore/core)
|
|||
set(CCSRC_DIR ${TOP_DIR}/mindspore/ccsrc)
|
||||
set(NNACL_DIR ${CCSRC_DIR}/plugin/device/cpu/kernel/nnacl)
|
||||
|
||||
if(TOOLCHAIN_NAME STREQUAL "cortex-m7")
|
||||
set(RUNTIME_COMPONENT_NAME "cortex-m7")
|
||||
if(PLATFORM_MCU)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-incompatible-pointer-types")
|
||||
add_subdirectory(${NNACL_DIR} build/nnacl)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter/micro/cmake/cortex-m/ build)
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter/micro/cmake/cortex-m/package.cmake)
|
||||
include(${TOP_DIR}/cmake/package_lite.cmake)
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
|
|
@ -269,6 +269,8 @@ build_lite() {
|
|||
MSLITE_COMPILE_TWICE=ON
|
||||
elif [[ ("${MSLITE_ENABLE_NNIE}" == "on" || "${MSLITE_REGISTRY_DEVICE}" == "Hi3516D") && "${local_lite_platform}" == "x86_64" ]]; then
|
||||
MSLITE_REGISTRY_DEVICE=Hi3516D
|
||||
elif [[ "${MSLITE_MICRO_PLATFORM}" == cortex-m* && "${local_lite_platform}" == "x86_64" ]]; then
|
||||
TOOLCHAIN_NAME="cortex-m7"
|
||||
fi
|
||||
|
||||
machine=`uname -m`
|
||||
|
@ -338,24 +340,25 @@ build_lite() {
|
|||
fi
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=on"
|
||||
fi
|
||||
elif [[ "$(uname)" == "Darwin" ]]; then
|
||||
pkg_name=mindspore-lite-${VERSION_STR}-ios-simulator
|
||||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake
|
||||
LITE_CMAKE_ARGS=`echo $LITE_CMAKE_ARGS | sed 's/-DCMAKE_BUILD_TYPE=Debug/-DCMAKE_BUILD_TYPE=Release/g'`
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DPLATFORM=SIMULATOR64 -DPLATFORM_ARM64=off -DENABLE_NEON=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off -DMSLITE_ENABLE_NPU=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_MINDDATA_IMPLEMENT=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off -DMSLITE_ENABLE_CONVERTER=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -G Xcode .."
|
||||
else
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
pkg_name=mindspore-lite-${VERSION_STR}-ios-simulator
|
||||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake
|
||||
LITE_CMAKE_ARGS=`echo $LITE_CMAKE_ARGS | sed 's/-DCMAKE_BUILD_TYPE=Debug/-DCMAKE_BUILD_TYPE=Release/g'`
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DPLATFORM=SIMULATOR64 -DPLATFORM_ARM64=off -DENABLE_NEON=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off -DMSLITE_ENABLE_NPU=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_MINDDATA_IMPLEMENT=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off -DMSLITE_ENABLE_CONVERTER=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -G Xcode .."
|
||||
if [[ "${machine}" == "aarch64" ]]; then
|
||||
echo "Use the '-I arm64' command when compiling MindSpore Lite on an aarch64 architecture system."
|
||||
exit 1
|
||||
fi
|
||||
if [[ "${TOOLCHAIN_NAME}" == "cortex-m7" ]]; then
|
||||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/mindspore/lite/cmake/cortex-m7.toolchain.cmake
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=cortex-m7"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DPLATFORM_MCU=on"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_MINDDATA_IMPLEMENT=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off -DMSLITE_ENABLE_TOOLS=off"
|
||||
else
|
||||
if [[ "${machine}" == "aarch64" ]]; then
|
||||
echo "Use the '-I arm64' command when compiling MindSpore Lite on an aarch64 architecture system."
|
||||
exit 1
|
||||
fi
|
||||
if [[ "${MSLITE_MICRO_PLATFORM}" == cortex-m* ]]; then
|
||||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/mindspore/lite/cmake/cortex-m7.toolchain.cmake
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=cortex-m7"
|
||||
fi
|
||||
# CPU : Linux-x86_64
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_MINDDATA_IMPLEMENT=lite_cv"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DPLATFORM_X86_64=on"
|
||||
|
|
|
@ -20,8 +20,8 @@ endif()
|
|||
set(CMAKE_C_COMPILER arm-none-eabi-gcc)
|
||||
set(CMAKE_CXX_COMPILER arm-none-eabi-g++)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_C_FLAGS "-mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard -specs=nosys.specs -specs=nano.specs")
|
||||
set(CMAKE_C_FLAGS "-mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard -specs=nosys.specs -specs=nano.specs")
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
|
|
|
@ -6,35 +6,8 @@ string(REPLACE "-Werror" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
|||
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
set(MICRO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)
|
||||
|
||||
include_directories(${NNACL_DIR}/..)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-attributes -fdata-sections -ffunction-sections")
|
||||
|
||||
file(GLOB KERNEL_SRC
|
||||
${NNACL_DIR}/*.c
|
||||
${NNACL_DIR}/fp32/*.c
|
||||
${NNACL_DIR}/infer/*.c
|
||||
${NNACL_DIR}/base/*.c
|
||||
${NNACL_DIR}/fp32_grad/*.c
|
||||
${NNACL_DIR}/kernel/*.c
|
||||
${NNACL_DIR}/experimental/*.c
|
||||
${NNACL_DIR}/int8/*.c
|
||||
${NNACL_DIR}/fp32_sparse/*.c
|
||||
${NNACL_DIR}/infer/string/*.c
|
||||
${NNACL_DIR}/infer/control/*.c
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM KERNEL_SRC ${NNACL_DIR}/infer/shape_fusion_infer.c)
|
||||
|
||||
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
|
||||
add_library(nnacl_static STATIC ${KERNEL_SRC})
|
||||
set_target_properties(nnacl_static PROPERITES OUTPUT_NAME "nnacl")
|
||||
target_compile_options(nnacl_static PRIVATE -fPIC)
|
||||
|
||||
target_link_options(nnacl_static PRIVATE -Wl,-z,relro,-z,now,-z,noexecstack)
|
||||
|
||||
include(${TOP_DIR}/cmake/utils.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/cmsis.cmake)
|
||||
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
include(CMakePackageConfigHelpers)
|
||||
|
||||
set(RUNTIME_PKG_NAME ${PKG_NAME_PREFIX}-${RUNTIME_COMPONENT_NAME})
|
||||
|
||||
include(${TOP_DIR}/cmake/package_micro.cmake)
|
||||
|
||||
__install_micro_wrapper()
|
||||
__install_micro_codegen()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
set(CPACK_GENERATOR ZIP)
|
||||
else()
|
||||
set(CPACK_GENERATOR TGZ)
|
||||
endif()
|
||||
|
||||
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||
set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME})
|
||||
set(CPACK_PACKAGE_FILE_NAME ${PKG_NAME_PREFIX})
|
||||
|
||||
if(WIN32)
|
||||
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output)
|
||||
else()
|
||||
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp)
|
||||
endif()
|
||||
set(CPACK_PACKAGE_CHECKSUM SHA256)
|
||||
include(CPack)
|
Loading…
Reference in New Issue