commit
5ce8a53cb9
|
@ -112,9 +112,15 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() {
|
|||
|
||||
int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) {
|
||||
Collect(context,
|
||||
{"nnacl/detection_post_process_parameter.h", "nnacl/fp32/detection_post_process_fp32.h",
|
||||
"wrapper/base/detection_post_process_base_wrapper.h"},
|
||||
{"detection_post_process_fp32.c", "detection_post_process_base_wrapper.c"});
|
||||
{
|
||||
"nnacl/detection_post_process_parameter.h",
|
||||
"nnacl/fp32/detection_post_process_fp32.h",
|
||||
"wrapper/base/detection_post_process_base_wrapper.h",
|
||||
},
|
||||
{
|
||||
"detection_post_process_fp32.c",
|
||||
"detection_post_process_base_wrapper.c",
|
||||
});
|
||||
|
||||
Serializer code;
|
||||
MS_CHECK_RET_CODE(GetInputData(context, &code), "GetInputData failed");
|
||||
|
|
|
@ -43,13 +43,27 @@ int DTypeCastCoder::DoCode(CoderContext *const context) {
|
|||
TypeId input_data_type = input_tensor_->data_type();
|
||||
TypeId output_data_type = output_tensor_->data_type();
|
||||
|
||||
std::vector<std::string> asmFiles;
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/cast.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/cast.c",
|
||||
"nnacl/fp32/common_func.c",
|
||||
});
|
||||
if (target_ == kARM32A) {
|
||||
asmFiles = {"nnacl/assembly/arm32/PostFuncBiasReluC8.S", "nnacl/assembly/arm32/PostFuncBiasReluC4.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"nnacl/assembly/arm32/PostFuncBiasReluC8.S",
|
||||
"nnacl/assembly/arm32/PostFuncBiasReluC4.S",
|
||||
});
|
||||
} else if (target_ == kARM64) {
|
||||
asmFiles = {"nnacl/assembly/arm64/PostFuncBiasReluC8.S", "nnacl/assembly/arm64/PostFuncBiasReluC4.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"nnacl/assembly/arm64/PostFuncBiasReluC8.S",
|
||||
"nnacl/assembly/arm64/PostFuncBiasReluC4.S",
|
||||
});
|
||||
}
|
||||
Collect(context, {"nnacl/fp32/cast.h"}, {"nnacl/fp32/cast.c", "nnacl/fp32/common_func.c"}, asmFiles);
|
||||
Serializer code;
|
||||
if (output_data_type != kNumberTypeFloat32) {
|
||||
if (input_data_type == kNumberTypeFloat32 && output_data_type == kNumberTypeInt32) {
|
||||
|
|
|
@ -46,7 +46,13 @@ int QuantDTypeCastCoder::DoCode(CoderContext *const context) {
|
|||
: input_tensor_->quant_params().at(0);
|
||||
int num_unit_thread = input_tensor_->ElementsNum();
|
||||
|
||||
Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/quant_dtype_cast_int8.h",
|
||||
},
|
||||
{
|
||||
"quant_dtype_cast_int8.c",
|
||||
});
|
||||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {
|
||||
|
|
|
@ -75,7 +75,13 @@ int AddInt8Coder::DoCode(CoderContext *const context) {
|
|||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
||||
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_add_s8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{
|
||||
"arm_elementwise_add_s8.c",
|
||||
});
|
||||
|
||||
code.CodeFunction("arm_elementwise_add_s8", input1_, input2, input_1_offset_, input_1_mult_, input_1_shift_,
|
||||
input_2_offset_, input_2_mult_, input_2_shift_, left_shift_, output_tensor_, out_offset_, out_mult_,
|
||||
|
|
|
@ -39,9 +39,11 @@ int Conv2DInt8Coder::Prepare(CoderContext *const context) {
|
|||
int Conv2DInt8Coder::DoCode(CoderContext *const context) {
|
||||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
std::vector<std::string> h_files;
|
||||
std::vector<std::string> c_files;
|
||||
h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h");
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{});
|
||||
if (opt_ != Convolve_1x1_fast) {
|
||||
code.CodeFunction("memset", buffer_, 0, buffer_size_);
|
||||
}
|
||||
|
@ -49,25 +51,36 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) {
|
|||
code.CodeArray("output_mult", output_mult_, output_ch_);
|
||||
switch (opt_) {
|
||||
case Basic:
|
||||
c_files = {"arm_convolve_s8.c", "arm_nn_mat_mult_kernel_s8_s16.c", "arm_q7_to_q15_with_offset.c"};
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_convolve_s8.c",
|
||||
"arm_nn_mat_mult_kernel_s8_s16.c",
|
||||
"arm_q7_to_q15_with_offset.c",
|
||||
});
|
||||
code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_,
|
||||
output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
|
||||
output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
|
||||
out_activation_max_, output_x_, output_y_, buffer_);
|
||||
break;
|
||||
case Convolve_1_x_n:
|
||||
c_files = {"arm_convolve_1_x_n_s8.c", "arm_nn_mat_mul_core_1x_s8.c"};
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_convolve_1_x_n_s8.c",
|
||||
"arm_nn_mat_mul_core_1x_s8.c",
|
||||
});
|
||||
code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_,
|
||||
output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift",
|
||||
"output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_,
|
||||
buffer_);
|
||||
break;
|
||||
case Convolve_1x1_fast:
|
||||
c_files = {"arm_convolve_1x1_s8_fast.c", "arm_nn_mat_mult_nt_t_s8.c", "arm_nn_mat_mul_core_4x_s8.c",
|
||||
"arm_nn_mat_mul_core_1x_s8.c"};
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_convolve_1x1_s8_fast.c",
|
||||
"arm_nn_mat_mult_nt_t_s8.c",
|
||||
"arm_nn_mat_mul_core_4x_s8.c",
|
||||
"arm_nn_mat_mul_core_1x_s8.c",
|
||||
});
|
||||
code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_,
|
||||
filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_,
|
||||
"output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
|
||||
|
|
|
@ -38,16 +38,19 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
|
|||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
||||
std::vector<std::string> h_files;
|
||||
std::vector<std::string> c_files;
|
||||
|
||||
h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h");
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{});
|
||||
code.CodeArray("output_shift", output_shift_, output_ch_);
|
||||
code.CodeArray("output_mult", output_mult_, output_ch_);
|
||||
switch (optimize_) {
|
||||
case Conv_3x3:
|
||||
c_files.emplace_back("arm_depthwise_conv_3x3_s8.c");
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_depthwise_conv_3x3_s8.c",
|
||||
});
|
||||
code.CodeFunction("arm_depthwise_conv_3x3_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
|
||||
output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, "output_shift",
|
||||
"output_mult", output_x_, output_y_, output_offset_, input_offset_, output_activation_min_,
|
||||
|
@ -55,9 +58,11 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
|
|||
break;
|
||||
case Conv_opt:
|
||||
// arm_depthwise_conv_s8_opt also depends on arm_depthwise_conv_s8
|
||||
c_files.emplace_back("arm_depthwise_conv_s8.c");
|
||||
c_files.emplace_back("arm_depthwise_conv_s8_opt.c");
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_depthwise_conv_s8.c",
|
||||
"arm_depthwise_conv_s8_opt.c",
|
||||
});
|
||||
code.CodeFunction("arm_depthwise_conv_s8_opt", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
|
||||
output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
|
||||
output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,
|
||||
|
@ -65,8 +70,10 @@ int DWConvInt8Coder::DoCode(CoderContext *const context) {
|
|||
"NULL");
|
||||
break;
|
||||
case Basic:
|
||||
c_files.emplace_back("arm_depthwise_conv_s8.c");
|
||||
Collect(context, h_files, c_files);
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_depthwise_conv_s8.c",
|
||||
});
|
||||
code.CodeFunction("arm_depthwise_conv_s8", input_tensor_, input_x_, input_y_, input_ch_, filter_tensor_,
|
||||
output_ch_, ch_mult_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
|
||||
output_tensor_, "output_shift", "output_mult", output_x_, output_y_, output_offset_,
|
||||
|
|
|
@ -35,7 +35,14 @@ int FullConnectionInt8Coder::DoCode(CoderContext *const context) {
|
|||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
||||
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_fully_connected_s8.c", "arm_nn_vec_mat_mult_t_s8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{
|
||||
"arm_fully_connected_s8.c",
|
||||
"arm_nn_vec_mat_mult_t_s8.c",
|
||||
});
|
||||
|
||||
code.CodeFunction("arm_fully_connected_s8", input_tensor_, filter_tensor_, col_dim_, row_dim_, nb_batches_,
|
||||
input_offset_, filter_offset_, out_multiplier_, out_shift_, output_offset_, bias_tensor_,
|
||||
|
|
|
@ -60,7 +60,13 @@ int MulInt8Coder::DoCode(CoderContext *const context) {
|
|||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
||||
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_elementwise_mul_s8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{
|
||||
"arm_elementwise_mul_s8.c",
|
||||
});
|
||||
|
||||
code.CodeFunction("arm_elementwise_mul_s8", input1_, input2_, input_1_offset_, input_2_offset_, output_tensor_,
|
||||
out_offset_, out_mult_, out_shift_, out_activation_min_, out_activation_max_, block_size_);
|
||||
|
|
|
@ -42,18 +42,27 @@ int PoolingInt8Coder::DoCode(CoderContext *const context) {
|
|||
// init struct PoolingParameters
|
||||
std::string pooling_func;
|
||||
|
||||
std::vector<std::string> cFiles;
|
||||
if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) {
|
||||
cFiles = {"arm_avgpool_s8.c"};
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_avgpool_s8.c",
|
||||
});
|
||||
pooling_func = "arm_avgpool_s8";
|
||||
} else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) {
|
||||
cFiles = {"arm_max_pool_s8.c"};
|
||||
Collect(context, {},
|
||||
{
|
||||
"arm_max_pool_s8.c",
|
||||
});
|
||||
pooling_func = "arm_max_pool_s8";
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported pad mode";
|
||||
return RET_ERROR;
|
||||
}
|
||||
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, cFiles);
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{});
|
||||
|
||||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
|
|
@ -69,7 +69,13 @@ int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
|
|||
Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
||||
Collect(context, {"CMSIS/NN/Include/arm_nnfunctions.h"}, {"arm_softmax_s8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"CMSIS/NN/Include/arm_nnfunctions.h",
|
||||
},
|
||||
{
|
||||
"arm_softmax_s8.c",
|
||||
});
|
||||
code.CodeFunction("arm_softmax_s8", input_tensor_, num_rows_, row_size_, mult_, shift_, diff_min_, output_tensor_);
|
||||
|
||||
MS_LOG(INFO) << "SoftMaxInt8Coder has been called";
|
||||
|
|
|
@ -33,7 +33,13 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) {
|
|||
int stride = UP_DIV(length, thread_num_);
|
||||
int count = MSMIN(stride, length - stride * task_id);
|
||||
|
||||
Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/activation_fp32.h",
|
||||
},
|
||||
{
|
||||
"activation_fp32.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
switch (activation_parameter->type_) {
|
||||
case schema::ActivationType_RELU:
|
||||
|
|
|
@ -28,7 +28,15 @@ int AddNFP32Coder::DoCode(CoderContext *const context) {
|
|||
int elements_num = input0->ElementsNum();
|
||||
|
||||
// Get Tensor Pointer
|
||||
Collect(context, {"nnacl/kernel/fp32/add_fp32.h"}, {"add_fp32.c", "arithmetic_fp32.c", "arithmetic_base.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/kernel/fp32/add_fp32.h",
|
||||
},
|
||||
{
|
||||
"add_fp32.c",
|
||||
"arithmetic_fp32.c",
|
||||
"arithmetic_base.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num);
|
||||
if (input_tensors_.size() > 2) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <type_traits>
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "nnacl/fp32/arithmetic_fp32.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/log.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
|
@ -245,8 +246,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
|
||||
int task_id = 0;
|
||||
void ArithmeticFP32Coder::ComputeInOutStrides() {
|
||||
if (arithmetic_parameter_->broadcasting_) {
|
||||
outside_ = 1;
|
||||
for (auto i = arithmetic_parameter_->ndim_ - 1; i >= 0; --i) {
|
||||
|
@ -263,11 +263,15 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
|
|||
ComputeStrides(arithmetic_parameter_->out_shape_, arithmetic_parameter_->out_strides_,
|
||||
arithmetic_parameter_->ndim_);
|
||||
}
|
||||
}
|
||||
|
||||
int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
|
||||
ComputeInOutStrides();
|
||||
|
||||
int element_num = output_tensor_->ElementsNum();
|
||||
MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
|
||||
int stride = UP_DIV(element_num, thread_num_);
|
||||
int count = MSMIN(stride, element_num - stride * task_id);
|
||||
int count = MSMIN(stride, element_num - stride * kDefaultTaskId);
|
||||
MS_CHECK_TRUE(!arithmetic_run_.empty(), "arithmetic_run function is nullptr!");
|
||||
NNaclFp32Serializer code;
|
||||
/**
|
||||
|
@ -275,22 +279,55 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
|
|||
* this solution is not suitable for micro, for the size of package.
|
||||
* */
|
||||
if (arithmetic_opt_run_ == "ElementOptSub" || arithmetic_run_ == "ElementSub") {
|
||||
Collect(context, {"nnacl/fp32/sub_fp32.h"}, {"sub_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/sub_fp32.h",
|
||||
},
|
||||
{
|
||||
"sub_fp32.c",
|
||||
});
|
||||
} else if (arithmetic_opt_run_ == "ElementOptAdd" || arithmetic_run_ == "ElementAdd") {
|
||||
Collect(context, {"nnacl/fp32/add_fp32.h"}, {"add_fp32.c", "arithmetic_fp32.c", "arithmetic_base.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/add_fp32.h",
|
||||
},
|
||||
{
|
||||
"add_fp32.c",
|
||||
"arithmetic_fp32.c",
|
||||
"arithmetic_base.c",
|
||||
});
|
||||
} else if (arithmetic_opt_run_ == "ElementOptMul" || arithmetic_run_ == "ElementMul") {
|
||||
Collect(context, {"nnacl/fp32/mul_fp32.h"}, {"mul_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/mul_fp32.h",
|
||||
},
|
||||
{
|
||||
"mul_fp32.c",
|
||||
});
|
||||
} else if (arithmetic_run_ == "ElementAddRelu") {
|
||||
Collect(context, {"nnacl/fp32/add_relu_fp32.h"}, {"add_relu_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/add_relu_fp32.h",
|
||||
},
|
||||
{
|
||||
"add_relu_fp32.c",
|
||||
});
|
||||
} else {
|
||||
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_fp32.h"},
|
||||
{"arithmetic_common.c", "arithmetic_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/arithmetic_common.h",
|
||||
"nnacl/fp32/arithmetic_fp32.h",
|
||||
},
|
||||
{
|
||||
"arithmetic_common.c",
|
||||
"arithmetic_fp32.c",
|
||||
});
|
||||
}
|
||||
|
||||
if (arithmetic_parameter_->broadcasting_) {
|
||||
stride = UP_DIV(outside_, thread_num_);
|
||||
out_count_ = MSMIN(stride, outside_ - stride * task_id);
|
||||
out_thread_stride_ = stride * task_id;
|
||||
out_count_ = MSMIN(stride, outside_ - stride * kDefaultTaskId);
|
||||
out_thread_stride_ = stride * kDefaultTaskId;
|
||||
std::string input0_str = allocator_->GetRuntimeAddr(input_tensor_);
|
||||
std::string input1_str = allocator_->GetRuntimeAddr(filter_tensor_);
|
||||
std::string output_str = allocator_->GetRuntimeAddr(output_tensor_);
|
||||
|
|
|
@ -80,6 +80,8 @@ class ArithmeticFP32Coder final : public OperatorCoder {
|
|||
private:
|
||||
int Init(CoderContext *const context);
|
||||
|
||||
void ComputeInOutStrides();
|
||||
|
||||
int BroadcastRun(const std::string &input0, const std::string &input1, const std::string &output, int dim,
|
||||
int out_count, int out_thread_stride, NNaclFp32Serializer *const code);
|
||||
|
||||
|
|
|
@ -66,7 +66,14 @@ int ArithmeticSelfFP32Coder::DoCode(CoderContext *const context) {
|
|||
|
||||
MS_CHECK_TRUE(!arithmetic_self_run_.empty(), "arithmetic_run function is nullptr!");
|
||||
|
||||
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_self.h"}, {"nnacl/fp32/arithmetic_self.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/arithmetic_common.h",
|
||||
"nnacl/fp32/arithmetic_self.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/arithmetic_self.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeFunction(arithmetic_self_run_, input_tensor_, output_tensor_, size);
|
||||
|
||||
|
|
|
@ -54,7 +54,13 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
|
|||
MS_CHECK_TRUE(input_tensors_.size() == 3, "inputs size is not equal to three");
|
||||
Tensor *mean_tensor = input_tensors_.at(1);
|
||||
Tensor *var_tensor = input_tensors_.at(2);
|
||||
Collect(context, {"nnacl/fp32/batchnorm.h"}, {"nnacl/fp32/batchnorm.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/batchnorm.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/batchnorm.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("bn_parameter", *bn_parameter);
|
||||
code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_);
|
||||
|
|
|
@ -38,9 +38,19 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
|
|||
size_t data_size = input_tensor_->ElementsNum();
|
||||
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true);
|
||||
Collect(ctx,
|
||||
{"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h",
|
||||
"nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"},
|
||||
{"arithmetic_base.c", "arithmetic_fp32.c", "add_fp32.c"});
|
||||
{
|
||||
"nnacl/arithmetic.h",
|
||||
"nnacl/nnacl_utils.h",
|
||||
"nnacl/nnacl_common.h",
|
||||
"nnacl/base/arithmetic_base.h",
|
||||
"nnacl/fp32/add_fp32.h",
|
||||
"nnacl/fp32/arithmetic_fp32.h",
|
||||
},
|
||||
{
|
||||
"arithmetic_base.c",
|
||||
"arithmetic_fp32.c",
|
||||
"add_fp32.c",
|
||||
});
|
||||
nnacl::NNaclFp32Serializer code;
|
||||
std::vector<int> dims = input_tensor_->shape();
|
||||
arithmetic_parameter_->broadcasting_ = false;
|
||||
|
|
|
@ -35,7 +35,13 @@ int ConcatFP32Coder::ReSize() {
|
|||
}
|
||||
|
||||
int ConcatFP32Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/fp32/concat.h"}, {"nnacl/fp32/concat.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/concat.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/concat.c",
|
||||
});
|
||||
|
||||
size_t input_num = input_tensors_.size();
|
||||
|
||||
|
|
|
@ -62,7 +62,16 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
|
|||
MS_CHECK_TRUE(conv_param_->input_channel_ == conv_param_->output_channel_,
|
||||
"Only support input channel equals output channel.");
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/fp32/conv_depthwise_fp32.h"}, {"conv_depthwise_fp32.c"}, {"ConvDwFp32Row.S"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/conv_depthwise_fp32.h",
|
||||
},
|
||||
{
|
||||
"conv_depthwise_fp32.c",
|
||||
},
|
||||
{
|
||||
"ConvDwFp32Row.S",
|
||||
});
|
||||
|
||||
nnacl::NNaclFp32Serializer code;
|
||||
// call the op function
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
|
||||
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
|
||||
#include "nnacl/fp32/winograd_utils.h"
|
||||
#include "src/ops/populate/populate_register.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/log.h"
|
||||
#include "src/common/prim_util.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "src/common/version_manager.h"
|
||||
#include "coder/opcoders/nnacl/dequant/de_quant.h"
|
||||
|
||||
|
@ -109,37 +107,60 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
|
||||
{
|
||||
std::vector<std::string> asmFiles;
|
||||
if (target_ == kARM32A) {
|
||||
asmFiles = {"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S",
|
||||
"MatmulFp32Opt12x4.S"};
|
||||
} else if (target_ == kARM64) {
|
||||
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S",
|
||||
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"};
|
||||
}
|
||||
std::vector<std::string> h_files = {"nnacl/fp32/conv_common_fp32.h", "nnacl/fp32/matmul_fp32.h",
|
||||
"nnacl/conv_parameter.h", "nnacl/op_base.h"};
|
||||
std::vector<std::string> c_files = {"common_func.c", "conv_common_fp32.c", "matmul_fp32.c", "pack_fp32.c"};
|
||||
if (de_quant_flag_) {
|
||||
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h");
|
||||
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c");
|
||||
}
|
||||
Collect(context, h_files, c_files, asmFiles);
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/conv_common_fp32.h",
|
||||
"nnacl/fp32/matmul_fp32.h",
|
||||
"nnacl/conv_parameter.h",
|
||||
"nnacl/op_base.h",
|
||||
},
|
||||
{
|
||||
"common_func.c",
|
||||
"conv_common_fp32.c",
|
||||
"matmul_fp32.c",
|
||||
"pack_fp32.c",
|
||||
});
|
||||
if (de_quant_flag_) {
|
||||
Collect(context,
|
||||
{
|
||||
"wrapper/fp32/dequant_int8_to_fp32_wrapper.h",
|
||||
},
|
||||
{
|
||||
"dequant_int8_to_fp32_wrapper.c",
|
||||
});
|
||||
}
|
||||
if (target_ == kARM32A) {
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S",
|
||||
"MatmulFp32Opt12x4.S",
|
||||
});
|
||||
} else if (target_ == kARM64) {
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"MatVecMulFp32.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S",
|
||||
});
|
||||
}
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
// call the op function
|
||||
code.CodeFunction("memset", packed_input_, "0", packed_input_size_);
|
||||
code.CodeFunction("memset", col_major_input_, "0", col_major_input_size_);
|
||||
code.CodeStruct("conv_parameter", *conv_param_);
|
||||
int task_id = 0;
|
||||
code.CodeFunction("ConvFp32", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_,
|
||||
output_tensor_, task_id, "(ConvParameter *)&conv_parameter");
|
||||
output_tensor_, kDefaultTaskId, "&conv_parameter");
|
||||
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <array>
|
||||
#include "nnacl/base/minimal_filtering_generator.h"
|
||||
#include "coder/log.h"
|
||||
#include "coder/opcoders/parallel.h"
|
||||
#include "coder/opcoders/file_collector.h"
|
||||
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
|
||||
|
||||
|
@ -213,20 +214,46 @@ std::string ConvolutionWinogradFP32Coder::GetOutputTransFunc(int input_unit, int
|
|||
}
|
||||
|
||||
int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
|
||||
std::vector<std::string> asmFiles;
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/conv_winograd_fp32.h",
|
||||
"nnacl/common_func.h",
|
||||
},
|
||||
{
|
||||
"common_func.c",
|
||||
"conv_int8.c",
|
||||
"matmul_int8.c",
|
||||
"pack_fp32.c",
|
||||
"conv_winograd_fp32.c",
|
||||
"winograd_transform.c",
|
||||
"common_func_fp32.c",
|
||||
"fixed_point.c",
|
||||
"winograd_utils.c",
|
||||
"minimal_filtering_generator.c",
|
||||
});
|
||||
if (target_ == kARM32A) {
|
||||
asmFiles = {
|
||||
"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S",
|
||||
});
|
||||
} else if (target_ == kARM64) {
|
||||
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S",
|
||||
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"MatVecMulFp32.S",
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"IndirectGemmInt16to32_8x4.S",
|
||||
"MatmulInt8.S",
|
||||
});
|
||||
}
|
||||
Collect(
|
||||
context, {"nnacl/fp32/conv_winograd_fp32.h", "nnacl/common_func.h"},
|
||||
{"common_func.c", "conv_int8.c", "matmul_int8.c", "pack_fp32.c", "conv_winograd_fp32.c", "winograd_transform.c",
|
||||
"common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"},
|
||||
asmFiles);
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
// call the op function
|
||||
|
@ -239,9 +266,8 @@ int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
|
|||
<< allocator_->GetRuntimeAddr(col_buffer_) << "};\n";
|
||||
code.CodeStruct("conv_parameter", *conv_param_);
|
||||
// code operator func
|
||||
int task_id = 0;
|
||||
code.CodeFunction("ConvWinogardFp32", input_tensor_, trans_weight_, new_bias_, output_tensor_,
|
||||
"tmp_buffer_address_list", task_id, "&conv_parameter", in_func_, out_func_);
|
||||
"tmp_buffer_address_list", kDefaultTaskId, "&conv_parameter", in_func_, out_func_);
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,13 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
|
|||
Tensor *input1 = input_tensors_.at(1);
|
||||
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/fp32/gather.h"}, {"nnacl/fp32/gather.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/gather.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/gather.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
std::vector<int> in_shape = input0->shape();
|
||||
|
|
|
@ -124,19 +124,39 @@ int MatMulFP32BaseCoder::Prepare(CoderContext *const context) { return RET_OK; }
|
|||
|
||||
int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
|
||||
// generate code .h .c
|
||||
std::vector<std::string> asm_files;
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/matmul_fp32.h",
|
||||
"wrapper/fp32/matmul_fp32_wrapper.h",
|
||||
},
|
||||
{
|
||||
"matmul_fp32.c",
|
||||
"matmul_fp32_wrapper.c",
|
||||
});
|
||||
if (target_ == kARM32A) {
|
||||
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatmulFp32Opt12x4.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"MatmulFp32Opt12x4.S",
|
||||
});
|
||||
} else if (target_ == kARM64) {
|
||||
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatVecMulFp32.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"MatmulFp32.S",
|
||||
"MatmulFp32Opt.S",
|
||||
"MatVecMulFp32.S",
|
||||
});
|
||||
}
|
||||
std::vector<std::string> h_files = {"nnacl/fp32/matmul_fp32.h", "wrapper/fp32/matmul_fp32_wrapper.h"};
|
||||
std::vector<std::string> c_files = {"matmul_fp32.c", "matmul_fp32_wrapper.c"};
|
||||
if (de_quant_flag_) {
|
||||
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h");
|
||||
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c");
|
||||
Collect(context,
|
||||
{
|
||||
"wrapper/fp32/dequant_int8_to_fp32_wrapper.h",
|
||||
},
|
||||
{
|
||||
"dequant_int8_to_fp32_wrapper.c",
|
||||
});
|
||||
}
|
||||
Collect(context, h_files, c_files, asm_files);
|
||||
NNaclFp32Serializer code;
|
||||
NNaclFp32Serializer init_code;
|
||||
code.CodeStruct("mat_mul_parameter", *params_);
|
||||
|
|
|
@ -27,7 +27,13 @@ int Nchw2NhwcFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
|
|||
|
||||
int Nchw2NhwcFP32Coder::DoCode(CoderContext *context) {
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/pack.h"}, {"nnacl/pack.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/pack.h",
|
||||
},
|
||||
{
|
||||
"nnacl/pack.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
if (input_tensor_->shape().size() == 4) {
|
||||
if (input_tensor_->data_type() == kNumberTypeFloat32) {
|
||||
|
|
|
@ -25,7 +25,13 @@ int Nhwc2NchwFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
|
|||
|
||||
int Nhwc2NchwFP32Coder::DoCode(CoderContext *const context) {
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/pack.h"}, {"pack.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/pack.h",
|
||||
},
|
||||
{
|
||||
"pack.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
if (input_tensor_->shape().size() == 4) {
|
||||
|
|
|
@ -80,7 +80,14 @@ int PadFP32Coder::ExtendPaddings(int *paddings, int length, const int *ori_paddi
|
|||
|
||||
int PadFP32Coder::DoCode(CoderContext *const context) {
|
||||
int task_id = thread_num_ - 1;
|
||||
Collect(context, {"nnacl/fp32/pad.h", "nnacl/pad_parameter.h"}, {"nnacl/fp32/pad.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/pad.h",
|
||||
"nnacl/pad_parameter.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/pad.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeArray("in_", in_, DEFAULT_PAD_NDIMS);
|
||||
|
|
|
@ -47,7 +47,13 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
|
|||
float minf = -FLT_MAX;
|
||||
float maxf = FLT_MAX;
|
||||
if (pooling_parameter->pool_mode_ == PoolMode_MaxPool) {
|
||||
Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/pooling_fp32.h",
|
||||
},
|
||||
{
|
||||
"pooling_fp32.c",
|
||||
});
|
||||
switch (pooling_parameter->act_type_) {
|
||||
case ActType_Relu: {
|
||||
minf = 0.f;
|
||||
|
@ -66,7 +72,13 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
|
|||
|
||||
code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
|
||||
} else {
|
||||
Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/pooling_fp32.h",
|
||||
},
|
||||
{
|
||||
"pooling_fp32.c",
|
||||
});
|
||||
switch (pooling_parameter->act_type_) {
|
||||
case ActType_Relu: {
|
||||
minf = 0.f;
|
||||
|
|
|
@ -48,7 +48,13 @@ int PowerFP32Coder::DoCode(CoderContext *const context) {
|
|||
cur_exp_str = exp_addr;
|
||||
}
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/power.h"}, {"power.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/power.h",
|
||||
},
|
||||
{
|
||||
"power.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeFunction("Power", input_tensor_, cur_exp_str, output_tensor_, len, scale_, shift_, broadcast);
|
||||
context->AppendCode(code.str());
|
||||
|
|
|
@ -52,11 +52,29 @@ int ReduceFP32Coder::ReSize() {
|
|||
int ReduceFP32Coder::DoCode(CoderContext *const context) {
|
||||
// generate code .h .c
|
||||
if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSum)) {
|
||||
Collect(context, {"runtime/kernel/fp32/reduce_sum.h"}, {"reduce_sum.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"runtime/kernel/fp32/reduce_sum.h",
|
||||
},
|
||||
{
|
||||
"reduce_sum.c",
|
||||
});
|
||||
} else if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
|
||||
Collect(context, {"runtime/kernel/fp32/reduce_mean.h"}, {"reduce_mean.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"runtime/kernel/fp32/reduce_mean.h",
|
||||
},
|
||||
{
|
||||
"reduce_mean.c",
|
||||
});
|
||||
} else {
|
||||
Collect(context, {"runtime/kernel/fp32/reduce.h"}, {"reduce.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"runtime/kernel/fp32/reduce.h",
|
||||
},
|
||||
{
|
||||
"reduce.c",
|
||||
});
|
||||
}
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
|
|
|
@ -127,7 +127,15 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) {
|
|||
Tensor *offset_tensor = input_tensors_.at(kBiasIndex);
|
||||
MS_CHECK_PTR(scale_tensor);
|
||||
MS_CHECK_PTR(offset_tensor);
|
||||
Collect(context, {"nnacl/scale.h", "nnacl/fp32/scale.h", "nnacl/quantization/quantize.h"}, {"scale.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/scale.h",
|
||||
"nnacl/fp32/scale.h",
|
||||
"nnacl/quantization/quantize.h",
|
||||
},
|
||||
{
|
||||
"scale.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("scale_parameter", *scale_param_);
|
||||
|
|
|
@ -48,7 +48,14 @@ int SoftMaxFP32Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int SoftMaxFP32Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/fp32/softmax_fp32.h"}, {"softmax_fp32.c", "exp_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/softmax_fp32.h",
|
||||
},
|
||||
{
|
||||
"softmax_fp32.c",
|
||||
"exp_fp32.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("softmax_parameter", *softmax_param_);
|
||||
code.CodeFunction("memset", sum_data_, "0", sum_data_size_);
|
||||
|
|
|
@ -40,7 +40,14 @@ int SpliceFP32Coder::DoCode(CoderContext *const context) {
|
|||
MS_LOG(ERROR) << "SpliceFP32Coder src_col not match to dst_col";
|
||||
return RET_ERROR;
|
||||
}
|
||||
Collect(context, {"nnacl/splice_parameter.h", "nnacl/fp32/splice_fp32.h"}, {"splice_fp32.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/splice_parameter.h",
|
||||
"nnacl/fp32/splice_fp32.h",
|
||||
},
|
||||
{
|
||||
"splice_fp32.c",
|
||||
});
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("splice_parameter", *splice_parameter);
|
||||
code.CodeFunction("SpliceFp32", input_tensor_, src_row, src_col, "&splice_parameter", output_tensor_, dst_row,
|
||||
|
|
|
@ -51,7 +51,13 @@ int TileFP32Coder::Prepare(CoderContext *const context) { return Resize(); }
|
|||
|
||||
int TileFP32Coder::DoCode(CoderContext *const context) {
|
||||
// generate code .h .c
|
||||
Collect(context, {"nnacl/fp32/tile.h"}, {"nnacl/fp32/tile.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/fp32/tile.h",
|
||||
},
|
||||
{
|
||||
"nnacl/fp32/tile.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
|
||||
|
|
|
@ -78,7 +78,15 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
Collect(context, {"nnacl/transpose.h", "nnacl/fp32/transpose.h", "nnacl/errorcode.h"}, {"transpose.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/transpose.h",
|
||||
"nnacl/fp32/transpose.h",
|
||||
"nnacl/errorcode.h",
|
||||
},
|
||||
{
|
||||
"transpose.c",
|
||||
});
|
||||
|
||||
NNaclFp32Serializer code;
|
||||
code.CodeStruct("transpose_parameter", *transpose_parameter_);
|
||||
|
|
|
@ -136,8 +136,16 @@ int AddInt8Coder::ReSize() {
|
|||
}
|
||||
|
||||
int AddInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"wrapper/int8/add_int8_wrapper.h"},
|
||||
{"add_int8_wrapper.c", "add_int8.c", "arithmetic_base.c", "arithmetic_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"wrapper/int8/add_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"add_int8_wrapper.c",
|
||||
"add_int8.c",
|
||||
"arithmetic_base.c",
|
||||
"arithmetic_int8.c",
|
||||
});
|
||||
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
|
||||
|
|
|
@ -49,14 +49,18 @@ int BatchNormInt8Coder::Prepare(CoderContext *const context) {
|
|||
return RET_OK;
|
||||
}
|
||||
int BatchNormInt8Coder::DoCode(CoderContext *context) {
|
||||
std::vector<std::string> headers = {"nnacl/slice_parameter.h"};
|
||||
std::vector<std::string> cFiles = {"batchnorm_int8.c"};
|
||||
NNaclInt8Serializer code;
|
||||
|
||||
code.CodeStruct("param", *batchnorm_param_);
|
||||
code.CodeFunction("BatchNormInt8", output_tensor_, input_tensor_, alpha_addr_, beta_addr_, kDefaultTaskId, "¶m");
|
||||
|
||||
Collect(context, headers, cFiles);
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/slice_parameter.h",
|
||||
},
|
||||
{
|
||||
"batchnorm_int8.c",
|
||||
});
|
||||
context->AppendCode(code.str());
|
||||
|
||||
return RET_OK;
|
||||
|
|
|
@ -90,8 +90,15 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) {
|
|||
count_unit_ = thread_num_ > 1 ? UP_DIV(before_axis_size, thread_num_) : before_axis_size;
|
||||
concat_param_->count_unit_ = count_unit_;
|
||||
|
||||
Collect(context, {"nnacl/int8/concat_int8.h", "wrapper/int8/concat_int8_wrapper.h"},
|
||||
{"concat_int8.c", "concat_int8_wrapper.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/concat_int8.h",
|
||||
"wrapper/int8/concat_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"concat_int8.c",
|
||||
"concat_int8_wrapper.c",
|
||||
});
|
||||
NNaclInt8Serializer code;
|
||||
|
||||
int in_tensor_count = input_tensors().size();
|
||||
|
|
|
@ -44,12 +44,29 @@ int Conv2D1x1Int8Coder::Prepare(CoderContext *const context) {
|
|||
|
||||
int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context,
|
||||
{"wrapper/int8/conv1x1_init_int8_wrapper.h", "wrapper/int8/conv1x1_run_int8_wrapper.h", "nnacl/common_func.h",
|
||||
"nnacl/base/conv1x1_base.h", "nnacl/int8/matmul_int8.h", "nnacl/int8/pack_int8.h",
|
||||
"nnacl/int8/conv1x1_int8.h", "nnacl/errorcode.h"},
|
||||
{"common_func.c", "pack_int8.c", "conv1x1_int8.c", "matmul_int8.c", "fixed_point.c",
|
||||
"conv1x1_init_int8_wrapper.c", "conv1x1_run_int8_wrapper.c", "conv1x1_base.c"},
|
||||
{"MatmulInt8Opt.S"});
|
||||
{
|
||||
"wrapper/int8/conv1x1_init_int8_wrapper.h",
|
||||
"wrapper/int8/conv1x1_run_int8_wrapper.h",
|
||||
"nnacl/common_func.h",
|
||||
"nnacl/base/conv1x1_base.h",
|
||||
"nnacl/int8/matmul_int8.h",
|
||||
"nnacl/int8/pack_int8.h",
|
||||
"nnacl/int8/conv1x1_int8.h",
|
||||
"nnacl/errorcode.h",
|
||||
},
|
||||
{
|
||||
"common_func.c",
|
||||
"pack_int8.c",
|
||||
"conv1x1_int8.c",
|
||||
"matmul_int8.c",
|
||||
"fixed_point.c",
|
||||
"conv1x1_init_int8_wrapper.c",
|
||||
"conv1x1_run_int8_wrapper.c",
|
||||
"conv1x1_base.c",
|
||||
},
|
||||
{
|
||||
"MatmulInt8Opt.S",
|
||||
});
|
||||
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
|
||||
|
|
|
@ -127,8 +127,17 @@ int Conv2D3x3Int8Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int Conv2D3x3Int8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/int8/conv_int8.h", "nnacl/int8/conv3x3_int8.h"},
|
||||
{"pack_int8.c", "conv_int8.c", "conv3x3_int8.c", "fixed_point.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/conv_int8.h",
|
||||
"nnacl/int8/conv3x3_int8.h",
|
||||
},
|
||||
{
|
||||
"pack_int8.c",
|
||||
"conv_int8.c",
|
||||
"conv3x3_int8.c",
|
||||
"fixed_point.c",
|
||||
});
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
// call the op function
|
||||
|
|
|
@ -182,19 +182,43 @@ int Conv2DINT8Coder::Resize() {
|
|||
}
|
||||
|
||||
int Conv2DINT8Coder::DoCode(CoderContext *const context) {
|
||||
std::vector<std::string> asm_files;
|
||||
if (target_ == kARM32A) {
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"MatmulInt8.S",
|
||||
});
|
||||
} else if (target_ == kARM64) {
|
||||
asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S", "MatmulDpInt8.S"};
|
||||
Collect(context, {}, {},
|
||||
{
|
||||
"PreSum4x16Int8Peroc.S",
|
||||
"PreSum4x16Int8Pert.S",
|
||||
"MatmulInt8.S",
|
||||
"MatmulDpInt8.S",
|
||||
});
|
||||
}
|
||||
Collect(context,
|
||||
{"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h",
|
||||
"wrapper/int8/conv_init_int8_wrapper.h", "wrapper/base/common_wrapper.h",
|
||||
"wrapper/base/optimize_handler_wrapper.h"},
|
||||
{"common_func.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c", "matmul_int8.c", "fixed_point.c",
|
||||
"convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "common_wrapper.c", "optimize_handler_wrapper.c"},
|
||||
asm_files);
|
||||
{
|
||||
"nnacl/int8/conv_int8.h",
|
||||
"nnacl/common_func.h",
|
||||
"wrapper/int8/convolution_int8_wrapper.h",
|
||||
"wrapper/base/common_wrapper.h",
|
||||
"wrapper/base/optimize_handler_wrapper.h",
|
||||
"wrapper/int8/conv_init_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"common_func.c",
|
||||
"pack_int8.c",
|
||||
"conv_int8.c",
|
||||
"winograd_transform.c",
|
||||
"matmul_int8.c",
|
||||
"fixed_point.c",
|
||||
"convolution_int8_wrapper.c",
|
||||
"conv_init_int8_wrapper.c",
|
||||
"common_wrapper.c",
|
||||
"optimize_handler_wrapper.c",
|
||||
});
|
||||
// call the op function
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
|
|
@ -81,17 +81,39 @@ int ConvolutionDepthwiseINT8Coder::InitWeightBias(CoderContext *const context) {
|
|||
int ConvolutionDepthwiseINT8Coder::DoCode(CoderContext *const context) {
|
||||
MS_CHECK_TRUE(conv_param_->input_channel_ == conv_param_->output_channel_,
|
||||
"Only support input channel equals output channel.");
|
||||
Collect(
|
||||
context,
|
||||
{"nnacl/int8/conv_depthwise_int8.h", "nnacl/int8/pack_int8.h", "wrapper/int8/convolution_depthwise_int8_wrapper.h"},
|
||||
{"conv_depthwise_int8.c", "fixed_point.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c",
|
||||
"convolution_depthwise_int8_wrapper.c"},
|
||||
{"ConvDwInt8Row.S", "ConvDwInt8PostAlign4.S", "ConvDwInt8PostAlign4PerChannel.S", "ConvDwInt8Center.S",
|
||||
"DeconvDwInt8Center.S", "DeconvDwInt8Post.S"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/conv_depthwise_int8.h",
|
||||
"nnacl/int8/pack_int8.h",
|
||||
"wrapper/int8/convolution_depthwise_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"conv_depthwise_int8.c",
|
||||
"fixed_point.c",
|
||||
"pack_int8.c",
|
||||
"conv_int8.c",
|
||||
"winograd_transform.c",
|
||||
"convolution_depthwise_int8_wrapper.c",
|
||||
},
|
||||
{
|
||||
"ConvDwInt8Row.S",
|
||||
"ConvDwInt8PostAlign4.S",
|
||||
"ConvDwInt8PostAlign4PerChannel.S",
|
||||
"ConvDwInt8Center.S",
|
||||
"DeconvDwInt8Center.S",
|
||||
"DeconvDwInt8Post.S",
|
||||
});
|
||||
if (target_ == kARM64) {
|
||||
Collect(context, {}, {},
|
||||
{"ConvDw3x3Int8.S", "ConvDw3x3Int8Corner.S", "ConvDw3x3Int8Horizontal.S", "ConvDw3x3Int8Stride2.S",
|
||||
"ConvDw3x3Int8Vertical.S", "MatmulDpInt8Opt.S", "MatmulOptR4Int8.S"});
|
||||
{
|
||||
"ConvDw3x3Int8.S",
|
||||
"ConvDw3x3Int8Corner.S",
|
||||
"ConvDw3x3Int8Horizontal.S",
|
||||
"ConvDw3x3Int8Stride2.S",
|
||||
"ConvDw3x3Int8Vertical.S",
|
||||
"MatmulDpInt8Opt.S",
|
||||
"MatmulOptR4Int8.S",
|
||||
});
|
||||
}
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
|
|
@ -123,7 +123,15 @@ int DeconvolutionInt8Coder::InitRunBuf(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int DeconvolutionInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/int8/deconv.h"}, {"int8/deconv.c", "pack_int8.c", "quantization/fixed_point.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/deconv.h",
|
||||
},
|
||||
{
|
||||
"int8/deconv.c",
|
||||
"pack_int8.c",
|
||||
"quantization/fixed_point.c",
|
||||
});
|
||||
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.CodeFunction("memset", input_ptr_, 0, input_ptr_size_);
|
||||
|
|
|
@ -43,7 +43,13 @@ int DetectionPostProcessInt8Coder::GetInputData(CoderContext *const context, Ser
|
|||
MS_CHECK_TRUE(boxes->data_type() == kNumberTypeInt8, "Input data type error");
|
||||
MS_CHECK_TRUE(scores->data_type() == kNumberTypeInt8, "Input data type error");
|
||||
|
||||
Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/quant_dtype_cast_int8.h",
|
||||
},
|
||||
{
|
||||
"quant_dtype_cast_int8.c",
|
||||
});
|
||||
code->CodeFunction("DoDequantizeInt8ToFp32", boxes, input_boxes_, boxes_quant_param.scale,
|
||||
boxes_quant_param.zeroPoint, boxes->ElementsNum());
|
||||
code->CodeFunction("DoDequantizeInt8ToFp32", scores, input_scores_, scores_quant_param.scale,
|
||||
|
|
|
@ -168,9 +168,18 @@ int MatMulBaseInt8Coder::Prepare(CoderContext *const context) { return RET_OK; }
|
|||
|
||||
int MatMulBaseInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context,
|
||||
{"nnacl/common_func.h", "nnacl/int8/common_func_int8.h", "nnacl/int8/matmul_int8.h",
|
||||
"wrapper/int8/matmul_int8_wrapper.h"},
|
||||
{"common_func.c", "common_func_int8.c", "matmul_int8.c", "matmul_int8_wrapper.c"});
|
||||
{
|
||||
"nnacl/common_func.h",
|
||||
"nnacl/int8/common_func_int8.h",
|
||||
"nnacl/int8/matmul_int8.h",
|
||||
"wrapper/int8/matmul_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"common_func.c",
|
||||
"common_func_int8.c",
|
||||
"matmul_int8.c",
|
||||
"matmul_int8_wrapper.c",
|
||||
});
|
||||
std::string value_str_end = ";\n";
|
||||
NNaclInt8Serializer init_code;
|
||||
NNaclInt8Serializer code;
|
||||
|
|
|
@ -47,7 +47,14 @@ int PoolingInt8Coder::DoCode(CoderContext *const context) {
|
|||
// get quant params
|
||||
std::vector<QuantArg> in_quant_args = in_tensor->quant_params();
|
||||
std::vector<QuantArg> out_quant_args = out_tensor->quant_params();
|
||||
Collect(context, {"nnacl/int8/pooling_int8.h", "nnacl/errorcode.h"}, {"pooling_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/pooling_int8.h",
|
||||
"nnacl/errorcode.h",
|
||||
},
|
||||
{
|
||||
"pooling_int8.c",
|
||||
});
|
||||
NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
// code op parameter
|
||||
|
|
|
@ -192,7 +192,14 @@ int ReduceInt8Coder::DoCode(CoderContext *const context) {
|
|||
MS_LOG(DEBUG) << "*****Reduce code start*****";
|
||||
int task_id = 0;
|
||||
NNaclInt8Serializer code;
|
||||
Collect(context, {"nnacl/int8/reduce_int8.h"}, {"reduce_int8.c", "fixed_point.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/reduce_int8.h",
|
||||
},
|
||||
{
|
||||
"reduce_int8.c",
|
||||
"fixed_point.c",
|
||||
});
|
||||
std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_);
|
||||
std::string dst_addr;
|
||||
std::string begin_src_data_src = allocator_->GetRuntimeAddr(begin_src_data_);
|
||||
|
|
|
@ -40,7 +40,13 @@ int ReluxInt8Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int ReluxInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/int8/relux_int8.h"}, {"relux_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/relux_int8.h",
|
||||
},
|
||||
{
|
||||
"relux_int8.c",
|
||||
});
|
||||
|
||||
NNaclInt8Serializer code;
|
||||
|
||||
|
|
|
@ -34,7 +34,13 @@ int ReshapeInt8Coder::DoCode(CoderContext *const context) {
|
|||
std::vector<QuantArg> input_quant_args = input->quant_params();
|
||||
std::vector<QuantArg> output_quant_args = output->quant_params();
|
||||
|
||||
Collect(context, {"nnacl/int8/reshape_int8.h"}, {"reshape_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/reshape_int8.h",
|
||||
},
|
||||
{
|
||||
"reshape_int8.c",
|
||||
});
|
||||
NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
ReshapeQuantArg reshape_quant_arg = {
|
||||
|
|
|
@ -61,9 +61,16 @@ int ResizeInt8Coder::ReSize() {
|
|||
}
|
||||
|
||||
int ResizeInt8Coder::DoCode(CoderContext *const context) {
|
||||
std::vector<std::string> headers = {"nnacl/int8/resize_int8.h", "wrapper/int8/resize_int8_wrapper.h"};
|
||||
std::vector<std::string> cFiles = {"resize_int8.c", "common_func.c", "resize_int8_wrapper.c"};
|
||||
Collect(context, headers, cFiles);
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/resize_int8.h",
|
||||
"wrapper/int8/resize_int8_wrapper.h",
|
||||
},
|
||||
{
|
||||
"resize_int8.c",
|
||||
"common_func.c",
|
||||
"resize_int8_wrapper.c",
|
||||
});
|
||||
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.CodeArray("input_shape", input_tensor_->shape().data(), input_tensor_->shape().size(), true);
|
||||
|
|
|
@ -59,7 +59,13 @@ int SigmodInt8Coder::Prepare(CoderContext *const context) {
|
|||
}
|
||||
|
||||
int SigmodInt8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/int8/sigmoid_int8.h"}, {"sigmoid_int8.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/sigmoid_int8.h",
|
||||
},
|
||||
{
|
||||
"sigmoid_int8.c",
|
||||
});
|
||||
|
||||
NNaclInt8Serializer code;
|
||||
|
||||
|
|
|
@ -69,7 +69,14 @@ int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
|
|||
outter_size *= softmax_param_->input_shape_[i];
|
||||
}
|
||||
MS_CHECK_TRUE(softmax_param_->n_dim_ < 5, "n_dim should be less than the length of maximum value of input_shape");
|
||||
Collect(context, {"nnacl/int8/softmax_int8.h"}, {"softmax_int8.c", "fixed_point.c"});
|
||||
Collect(context,
|
||||
{
|
||||
"nnacl/int8/softmax_int8.h",
|
||||
},
|
||||
{
|
||||
"softmax_int8.c",
|
||||
"fixed_point.c",
|
||||
});
|
||||
|
||||
NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
|
|
|
@ -62,15 +62,15 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const SoftmaxParam
|
|||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const ConvParameter &conv_parameter) {
|
||||
code << "int thread_num = MSMIN(" << gThreadNum << ", " << conv_parameter.output_h_ << ");\n";
|
||||
CodeBaseStruct("ConvParameter", name, conv_parameter.op_parameter_, "{}", conv_parameter.kernel_h_,
|
||||
conv_parameter.kernel_w_, conv_parameter.stride_h_, conv_parameter.stride_w_,
|
||||
conv_parameter.dilation_h_, conv_parameter.dilation_w_, conv_parameter.pad_u_, conv_parameter.pad_d_,
|
||||
conv_parameter.pad_l_, conv_parameter.pad_r_, conv_parameter.group_, conv_parameter.tile_num_,
|
||||
conv_parameter.input_batch_, conv_parameter.input_h_, conv_parameter.input_w_,
|
||||
conv_parameter.input_channel_, conv_parameter.output_batch_, conv_parameter.output_h_,
|
||||
conv_parameter.output_w_, conv_parameter.output_channel_, "thread_num", conv_parameter.input_unit_,
|
||||
conv_parameter.output_unit_, conv_parameter.pad_mode_, conv_parameter.act_type_,
|
||||
conv_parameter.channel_multiplie_, conv_parameter.output_padding_w_, conv_parameter.output_padding_h_);
|
||||
CodeBaseStruct<false>(
|
||||
"ConvParameter", name, conv_parameter.op_parameter_, "{}", conv_parameter.kernel_h_, conv_parameter.kernel_w_,
|
||||
conv_parameter.stride_h_, conv_parameter.stride_w_, conv_parameter.dilation_h_, conv_parameter.dilation_w_,
|
||||
conv_parameter.pad_u_, conv_parameter.pad_d_, conv_parameter.pad_l_, conv_parameter.pad_r_, conv_parameter.group_,
|
||||
conv_parameter.tile_num_, conv_parameter.input_batch_, conv_parameter.input_h_, conv_parameter.input_w_,
|
||||
conv_parameter.input_channel_, conv_parameter.output_batch_, conv_parameter.output_h_, conv_parameter.output_w_,
|
||||
conv_parameter.output_channel_, "thread_num", conv_parameter.input_unit_, conv_parameter.output_unit_,
|
||||
conv_parameter.pad_mode_, conv_parameter.act_type_, conv_parameter.channel_multiplie_,
|
||||
conv_parameter.output_padding_w_, conv_parameter.output_padding_h_);
|
||||
}
|
||||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const MatMulParameter &mat_mul_parameter) {
|
||||
|
|
Loading…
Reference in New Issue