forked from mindspore-Ecosystem/mindspore
[MSLITE] npu innerContext comment bug
This commit is contained in:
parent
a34d737858
commit
abb35e6688
|
@ -43,7 +43,7 @@ void PadSliceParameterTo8D(SliceParameter *param) {
|
|||
param->param_length_ = DIMENSION_8D;
|
||||
}
|
||||
|
||||
void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size) {
|
||||
void DoSlice(const void *input, void *output, const SliceParameter *param, int thread_id, int data_size) {
|
||||
int8_t *int8_in = (int8_t *)input;
|
||||
int8_t *int8_out = (int8_t *)output;
|
||||
|
||||
|
@ -94,14 +94,14 @@ void DoSlice(const void *input, void *output, SliceParameter *param, int thread_
|
|||
}
|
||||
}
|
||||
|
||||
static bool WhetherCopyByAxis(int begin[], int end[], const int shape[], int dim) {
|
||||
static bool WhetherCopyByAxis(const int begin[], const int end[], const int shape[], int dim) {
|
||||
for (int i = dim + 1; i < DIMENSION_8D; ++i) {
|
||||
if (begin[i] != 0 || end[i] != shape[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size) {
|
||||
void DoSliceNoParallel(const void *input, void *output, const SliceParameter *param, int data_size) {
|
||||
int8_t *int8_in = (int8_t *)input;
|
||||
int8_t *int8_out = (int8_t *)output;
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@ extern "C" {
|
|||
#endif
|
||||
void PadSliceParameterTo8D(SliceParameter *param);
|
||||
|
||||
void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size);
|
||||
void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size);
|
||||
void DoSlice(const void *input, void *output, const SliceParameter *param, int thread_id, int data_size);
|
||||
void DoSliceNoParallel(const void *input, void *output, const SliceParameter *param, int data_size);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -20,12 +20,12 @@
|
|||
#include "nnacl/errorcode.h"
|
||||
|
||||
int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit,
|
||||
SplitParameter *split_param, int data_size) {
|
||||
const SplitParameter *split_param, int data_size) {
|
||||
int8_t *int8_in = (int8_t *)in_data;
|
||||
|
||||
int num_split = split_param->num_split_;
|
||||
int *split_sizes = split_param->split_sizes_;
|
||||
int *strides = split_param->strides_;
|
||||
const int *strides = split_param->strides_;
|
||||
int split_dim = split_param->split_dim_;
|
||||
int in_stride = strides[split_dim];
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit,
|
||||
SplitParameter *split_param, int data_size);
|
||||
const SplitParameter *split_param, int data_size);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include <string.h>
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, SplitWithOverlapParameter *param,
|
||||
int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, const SplitWithOverlapParameter *param,
|
||||
const int *start_indices, const int *end_indices) {
|
||||
if (in_data == NULL || out_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, SplitWithOverlapParameter *param,
|
||||
int DoSplitWithOverlapParallel(char *in_data, char **out_data, int slice_idx, const SplitWithOverlapParameter *param,
|
||||
const int *start_indices, const int *end_indices);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "nnacl/base/unstack_base.h"
|
||||
|
||||
void Unstack(const void *input, void **output, UnstackParameter *para, int data_size) {
|
||||
void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size) {
|
||||
const int8_t *in_addr = (int8_t *)input;
|
||||
for (int j = 0; j < para->num_; j++) {
|
||||
int8_t *out_addr = (int8_t *)output[j];
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Unstack(const void *input, void **output, UnstackParameter *para, int data_size);
|
||||
void Unstack(const void *input, void **output, const UnstackParameter *para, int data_size);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -78,7 +78,7 @@ void AddInt8OutputRounding(int32x4_t *out1, int32x4_t *out2, int32x4_t *out3, in
|
|||
}
|
||||
#endif
|
||||
|
||||
void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, AddQuantParameter *params) {
|
||||
void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, const AddQuantParameter *params) {
|
||||
int in0_left_shift = (1 << params->left_shift_) * (1 << params->in0_args_.left_shift_);
|
||||
int in1_left_shift = (1 << params->left_shift_) * (1 << params->in1_args_.left_shift_);
|
||||
int index = 0;
|
||||
|
|
|
@ -50,7 +50,7 @@ typedef struct AddQuantParameter {
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, AddQuantParameter *params);
|
||||
void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int size, const AddQuantParameter *params);
|
||||
|
||||
void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, int size,
|
||||
const AddQuantParameter *params, const AddQuantQrgs *ptr_args, const AddQuantQrgs *ele_args);
|
||||
|
|
|
@ -867,9 +867,9 @@ void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, in
|
|||
}
|
||||
|
||||
// int8 convolution 3x3
|
||||
void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bias_data, int8_t *output_data,
|
||||
int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer, int8_t *tmp_out,
|
||||
int task_id, const ConvParameter *conv_param) {
|
||||
void Conv3x3Int8(const int16_t *input_data, const int16_t *transed_weight, const int32_t *bias_data,
|
||||
int8_t *output_data, int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer,
|
||||
int8_t *tmp_out, int task_id, const ConvParameter *conv_param) {
|
||||
int ic8 = UP_DIV(conv_param->input_channel_, C8NUM);
|
||||
int out_w_block = UP_DIV(conv_param->output_w_, OUPUT_UNIT);
|
||||
int out_h_block = UP_DIV(conv_param->output_h_, OUPUT_UNIT);
|
||||
|
|
|
@ -37,9 +37,9 @@ extern "C" {
|
|||
void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weight, int iC8, int output_channel,
|
||||
int kernel_plane);
|
||||
|
||||
void Conv3x3Int8(int16_t *input_data, int16_t *transed_weight, const int32_t *bias_data, int8_t *output_data,
|
||||
int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer, int8_t *tmp_out,
|
||||
int task_id, const ConvParameter *conv_param);
|
||||
void Conv3x3Int8(const int16_t *input_data, const int16_t *transed_weight, const int32_t *bias_data,
|
||||
int8_t *output_data, int16_t *tile_buffer, int16_t *block_unit_buffer, int32_t *tmp_dst_buffer,
|
||||
int8_t *tmp_out, int task_id, const ConvParameter *conv_param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -130,9 +130,9 @@ void DeConvPackInputSum(const int8_t *src, int32_t *dst, int32_t filter_zp, size
|
|||
return;
|
||||
}
|
||||
|
||||
int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, int32_t *weight_sum, int32_t *input_sum,
|
||||
size_t act_row, size_t act_col, size_t act_deep, ConvParameter *conv_param,
|
||||
MATMUL_OPT_R4_FUNC matmul_func) {
|
||||
int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, const int32_t *weight_sum,
|
||||
const int32_t *input_sum, size_t act_row, size_t act_col, size_t act_deep,
|
||||
const ConvParameter *conv_param, MATMUL_OPT_R4_FUNC matmul_func) {
|
||||
if (matmul_func != NULL) {
|
||||
matmul_func(input, weight, output, act_row, act_col, act_deep, input_sum, weight_sum);
|
||||
} else {
|
||||
|
|
|
@ -34,9 +34,9 @@ void DeConvPackInputSum(const int8_t *src, int32_t *dst, int32_t filter_zp, size
|
|||
void DeConvWeightTransInt8(const int8_t *src, int8_t *dst, int input_channel, int output_channel, int plane,
|
||||
bool support_optimize_);
|
||||
|
||||
int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, int32_t *weight_sum, int32_t *input_sum,
|
||||
size_t act_row, size_t act_col, size_t act_deep, ConvParameter *conv_param,
|
||||
MATMUL_OPT_R4_FUNC matmul_func);
|
||||
int DeConvInt8(const int8_t *input, const int8_t *weight, int32_t *output, const int32_t *weight_sum,
|
||||
const int32_t *input_sum, size_t act_row, size_t act_col, size_t act_deep,
|
||||
const ConvParameter *conv_param, MATMUL_OPT_R4_FUNC matmul_func);
|
||||
int DeConvPostInt8(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
|
||||
ConvParameter *conv_param, bool support_optimize);
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "nnacl/int8/matmul_int8.h"
|
||||
#include "nnacl/int8/fixed_point.h"
|
||||
|
||||
void RowMajor2Row2x16MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
int col16 = UP_ROUND(col, C16NUM);
|
||||
for (int r = 0; r < row; r++) {
|
||||
int rd2 = r / C2NUM;
|
||||
|
@ -32,7 +32,7 @@ void RowMajor2Row2x16MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int co
|
|||
}
|
||||
}
|
||||
|
||||
void RowMajor2Col16x2MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
void RowMajor2Col16x2MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col) {
|
||||
int row16 = UP_ROUND(row, C16NUM);
|
||||
int stride = sizeof(int8_t) * C16NUM * C2NUM;
|
||||
for (int r = 0; r < row; ++r) {
|
||||
|
@ -60,9 +60,9 @@ void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, i
|
|||
}
|
||||
}
|
||||
|
||||
void MatrixPack4x16UnitInt8(int8_t *src, int8_t *dst, int row, int col, int stride) {
|
||||
void MatrixPack4x16UnitInt8(const int8_t *src, int8_t *dst, int row, int col, int stride) {
|
||||
for (int r = 0; r < row; r++) {
|
||||
int8_t *src_r = src + r * stride;
|
||||
const int8_t *src_r = src + r * stride;
|
||||
int8_t *dst_r = dst + r * C16NUM;
|
||||
memcpy(dst_r, src_r, col * sizeof(int8_t));
|
||||
}
|
||||
|
@ -196,9 +196,9 @@ void MatMulInt8_16x4(const int8_t *a, const int8_t *b, int *dst, int row_4, int
|
|||
}
|
||||
|
||||
void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_16,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
|
||||
bool peroc) {
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
|
||||
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, bool peroc) {
|
||||
/* support per-layer && weight per-channel */
|
||||
/* row4x16-major * row16x2-major => (int8)row-major*/
|
||||
for (int r = 0; r < row; r++) {
|
||||
|
|
|
@ -48,12 +48,12 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
|
|||
|
||||
/* 4x16 16x2 -> 4x2 */
|
||||
/* arm32 conv1x1 */
|
||||
void RowMajor2Row2x16MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void RowMajor2Col16x2MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void RowMajor2Row2x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void RowMajor2Col16x2MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
|
||||
void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_16,
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
|
||||
int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
|
||||
bool peroc);
|
||||
size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
|
||||
const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
|
||||
int32_t maxi, bool peroc);
|
||||
|
||||
/* 4x4 4x16 -> 4x16 */
|
||||
/* optimize conv1x1 */
|
||||
|
|
|
@ -807,7 +807,7 @@ void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_
|
|||
}
|
||||
}
|
||||
|
||||
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param) {
|
||||
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, const ConvParameter *conv_param) {
|
||||
int in_batch = conv_param->input_batch_;
|
||||
int in_channel = conv_param->input_channel_;
|
||||
int in_h = conv_param->input_h_;
|
||||
|
@ -961,7 +961,7 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
|
|||
}
|
||||
|
||||
void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
|
||||
ConvQuantArg *quant_qrg) {
|
||||
const ConvQuantArg *quant_qrg) {
|
||||
int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
|
||||
for (int c = 0; c < channel; c++) {
|
||||
if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
|
||||
|
@ -980,7 +980,7 @@ void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight
|
|||
}
|
||||
|
||||
void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
|
||||
ConvQuantArg *quant_qrg) {
|
||||
const ConvQuantArg *quant_qrg) {
|
||||
int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
|
||||
for (int c = 0; c < channel; c++) {
|
||||
if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
|
||||
|
|
|
@ -39,7 +39,7 @@ void PackNHWCToNCHWInt8(const void *src, void *dst, int batch, int plane, int ch
|
|||
void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t *filter_zp,
|
||||
const ConvParameter *conv_param);
|
||||
void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16);
|
||||
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param);
|
||||
void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, const ConvParameter *conv_param);
|
||||
void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param);
|
||||
void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num,
|
||||
int block_index, const int32_t *filter_zp, int32_t *input_sum,
|
||||
|
@ -52,9 +52,9 @@ void PreSum4x16Int8Peroc(const int8_t *src, int32_t *sum, const int32_t *zp, siz
|
|||
|
||||
void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter *conv_param);
|
||||
void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
|
||||
ConvQuantArg *quant_qrg);
|
||||
const ConvQuantArg *quant_qrg);
|
||||
void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
|
||||
ConvQuantArg *quant_qrg);
|
||||
const ConvQuantArg *quant_qrg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -205,7 +205,6 @@ bool InnerContext::IsGpuEnabled() const {
|
|||
|
||||
bool InnerContext::IsNpuEnabled() const {
|
||||
#ifdef SUPPORT_NPU
|
||||
// return IsUserSetNpu() && npu_manager_->IsSupportNPU();
|
||||
return IsUserSetNpu();
|
||||
#else
|
||||
return false;
|
||||
|
|
Loading…
Reference in New Issue