!7686 [MS][LITE][CPU]fix bug of deDepthwise int8 bug

Merge pull request !7686 from fuzhiye/tmp
This commit is contained in:
mindspore-ci-bot 2020-10-23 17:33:13 +08:00 committed by Gitee
commit c0f3baf842
9 changed files with 21 additions and 60 deletions

View File

@ -62,9 +62,9 @@ void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, in
#endif
}
void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) {
void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) {
int kernel_h = conv_param->kernel_h_;
int kernel_w = conv_param->kernel_w_;
int in_batch = conv_param->input_batch_;

View File

@ -32,9 +32,9 @@
extern "C" {
#endif
// int8 conv common
void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize);
void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize);
// int8 convolution 1x1
void Conv1x1PreOptPeroc(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, size_t input_channel,

View File

@ -141,34 +141,6 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
return RET_OK;
}
int ConvolutionBaseCPUKernel::SetIfAsymmetric() {
uint8_t asymmetric = 0b0;
auto filter_tensor = in_tensors_.at(kWeightIndex);
auto filter_ele_num = filter_tensor->ElementsNum();
auto filter_data = reinterpret_cast<int8_t *>(filter_tensor->MutableData());
int min_value = INT8_MAX;
int max_value = INT8_MIN;
for (int i = 0; i < filter_ele_num; ++i) {
min_value = min_value < filter_data[i] ? min_value : filter_data[i];
max_value = max_value > filter_data[i] ? max_value : filter_data[i];
}
if (conv_quant_arg_->filter_arg_num_ == kPerTensor) {
auto filter_zp = conv_quant_arg_->filter_quant_args_[0].zp_;
if (filter_zp != 0 && min_value >= -128 && max_value <= 127) {
asymmetric = asymmetric | FILTER_ASYMMETRIC;
}
} else {
auto filter_arg = conv_quant_arg_->filter_quant_args_;
for (int i = 0; i < conv_param_->output_channel_; ++i) {
if (filter_arg[i].zp_ != 0 && min_value >= -128 && max_value <= 127) {
asymmetric = asymmetric | FILTER_ASYMMETRIC;
}
}
}
conv_quant_arg_->asymmetric_ = asymmetric;
return RET_OK;
}
int ConvolutionBaseCPUKernel::MallocQuantParam() {
conv_quant_arg_ = &conv_param_->conv_quant_arg_;
auto input_tensor = in_tensors_.at(kInputIndex);

View File

@ -48,7 +48,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
int Init() override;
int ReSize() override { return 0; }
int Run() override { return 0; }
int SetIfAsymmetric();
int SetIfPerChannel();
int MallocQuantParam();
int SetQuantParam();

View File

@ -156,14 +156,7 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() {
return RET_OK;
}
void Convolution3x3Int8CPUKernel::ConfigInputOutput() {
auto output_tensor = out_tensors_.at(kOutputIndex);
output_tensor->SetFormat(schema::Format::Format_NHWC);
}
int Convolution3x3Int8CPUKernel::Init() {
// config input output
ConfigInputOutput();
auto ret = SetQuantParam();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set quant param failed.";

View File

@ -38,7 +38,6 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
int RunImpl(int task_id);
int InitWeightBias();
int InitTmpBuffer();
void ConfigInputOutput();
private:
void FreeTmpBuffer();

View File

@ -59,7 +59,7 @@ void ConvolutionInt8CPUKernel::CheckSupportOptimize() {
conv_param_->tile_num_ = tile_num_;
}
int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
int ConvolutionInt8CPUKernel::InitWeightBias() {
auto filter_tensor = in_tensors_.at(kWeightIndex);
auto input_channel = filter_tensor->Channel();
auto output_channel = filter_tensor->Batch();
@ -83,7 +83,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
}
#endif
int pack_weight_size = up_round_oc * up_round_deep;
int bias_size = up_round_oc * sizeof(int32_t);
size_t bias_size = up_round_oc * sizeof(int32_t);
int32_t input_zp = conv_param_->conv_quant_arg_.input_quant_args_[0].zp_;
// init weight
@ -150,7 +150,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
return RET_OK;
}
int ConvolutionInt8CPUKernel::InitTmpBufferOpt() {
int ConvolutionInt8CPUKernel::InitTmpBuffer() {
MS_ASSERT(ctx_->allocator != nullptr);
int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_;
int tmp_size;
@ -181,7 +181,7 @@ int ConvolutionInt8CPUKernel::Init() {
return ret;
}
ret = InitWeightBiasOpt();
ret = InitWeightBias();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Initialization for optimized int8 conv failed.";
return RET_ERROR;
@ -212,9 +212,8 @@ int ConvolutionInt8CPUKernel::RunImpl(int task_id) {
auto input_tensor = in_tensors_.at(kInputIndex);
auto ori_input_data = reinterpret_cast<int8_t *>(input_tensor->MutableData());
auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->MutableData());
ConvInt8Opt(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_,
reinterpret_cast<int32_t *>(bias_data_), output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_,
matmul_func_, support_optimize_);
ConvInt8(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_),
output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_, matmul_func_, support_optimize_);
return RET_OK;
}
@ -229,7 +228,7 @@ int ConvolutionInt8Impl(void *cdata, int task_id) {
}
int ConvolutionInt8CPUKernel::Run() {
auto ret = InitTmpBufferOpt();
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
return RET_ERROR;

View File

@ -51,8 +51,8 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel {
int Run() override;
int RunImpl(int task_id);
void CheckSupportOptimize();
int InitWeightBiasOpt();
int InitTmpBufferOpt();
int InitWeightBias();
int InitTmpBuffer();
private:
void FreeTmpBuffer() {

View File

@ -149,12 +149,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() {
int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
InitSlideParam();
ConvolutionBaseCPUKernel::Init();
auto ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
return ret;
}
return RET_OK;
}
@ -179,6 +173,11 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
}
auto ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
return ret;
}
// pack input, assume input format: NHWC -> NHWC4
auto input_tensor = in_tensors_.at(kInputIndex);
@ -191,7 +190,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
packed_output_ = output_addr;
}
auto ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]";
return RET_ERROR;