forked from mindspore-Ecosystem/mindspore
!7686 [MS][LITE][CPU]fix bug of deDepthwise int8 bug
Merge pull request !7686 from fuzhiye/tmp
This commit is contained in:
commit
c0f3baf842
|
@ -62,9 +62,9 @@ void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, in
|
|||
#endif
|
||||
}
|
||||
|
||||
void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
|
||||
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
|
||||
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) {
|
||||
void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
|
||||
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
|
||||
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) {
|
||||
int kernel_h = conv_param->kernel_h_;
|
||||
int kernel_w = conv_param->kernel_w_;
|
||||
int in_batch = conv_param->input_batch_;
|
||||
|
|
|
@ -32,9 +32,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
// int8 conv common
|
||||
void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
|
||||
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
|
||||
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize);
|
||||
void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight,
|
||||
const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id,
|
||||
ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize);
|
||||
|
||||
// int8 convolution 1x1
|
||||
void Conv1x1PreOptPeroc(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, size_t input_channel,
|
||||
|
|
|
@ -141,34 +141,6 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionBaseCPUKernel::SetIfAsymmetric() {
|
||||
uint8_t asymmetric = 0b0;
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
auto filter_ele_num = filter_tensor->ElementsNum();
|
||||
auto filter_data = reinterpret_cast<int8_t *>(filter_tensor->MutableData());
|
||||
int min_value = INT8_MAX;
|
||||
int max_value = INT8_MIN;
|
||||
for (int i = 0; i < filter_ele_num; ++i) {
|
||||
min_value = min_value < filter_data[i] ? min_value : filter_data[i];
|
||||
max_value = max_value > filter_data[i] ? max_value : filter_data[i];
|
||||
}
|
||||
if (conv_quant_arg_->filter_arg_num_ == kPerTensor) {
|
||||
auto filter_zp = conv_quant_arg_->filter_quant_args_[0].zp_;
|
||||
if (filter_zp != 0 && min_value >= -128 && max_value <= 127) {
|
||||
asymmetric = asymmetric | FILTER_ASYMMETRIC;
|
||||
}
|
||||
} else {
|
||||
auto filter_arg = conv_quant_arg_->filter_quant_args_;
|
||||
for (int i = 0; i < conv_param_->output_channel_; ++i) {
|
||||
if (filter_arg[i].zp_ != 0 && min_value >= -128 && max_value <= 127) {
|
||||
asymmetric = asymmetric | FILTER_ASYMMETRIC;
|
||||
}
|
||||
}
|
||||
}
|
||||
conv_quant_arg_->asymmetric_ = asymmetric;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionBaseCPUKernel::MallocQuantParam() {
|
||||
conv_quant_arg_ = &conv_param_->conv_quant_arg_;
|
||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
|
|
|
@ -48,7 +48,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
|
|||
int Init() override;
|
||||
int ReSize() override { return 0; }
|
||||
int Run() override { return 0; }
|
||||
int SetIfAsymmetric();
|
||||
int SetIfPerChannel();
|
||||
int MallocQuantParam();
|
||||
int SetQuantParam();
|
||||
|
|
|
@ -156,14 +156,7 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
void Convolution3x3Int8CPUKernel::ConfigInputOutput() {
|
||||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
output_tensor->SetFormat(schema::Format::Format_NHWC);
|
||||
}
|
||||
|
||||
int Convolution3x3Int8CPUKernel::Init() {
|
||||
// config input output
|
||||
ConfigInputOutput();
|
||||
auto ret = SetQuantParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set quant param failed.";
|
||||
|
|
|
@ -38,7 +38,6 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int RunImpl(int task_id);
|
||||
int InitWeightBias();
|
||||
int InitTmpBuffer();
|
||||
void ConfigInputOutput();
|
||||
|
||||
private:
|
||||
void FreeTmpBuffer();
|
||||
|
|
|
@ -59,7 +59,7 @@ void ConvolutionInt8CPUKernel::CheckSupportOptimize() {
|
|||
conv_param_->tile_num_ = tile_num_;
|
||||
}
|
||||
|
||||
int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
|
||||
int ConvolutionInt8CPUKernel::InitWeightBias() {
|
||||
auto filter_tensor = in_tensors_.at(kWeightIndex);
|
||||
auto input_channel = filter_tensor->Channel();
|
||||
auto output_channel = filter_tensor->Batch();
|
||||
|
@ -83,7 +83,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
|
|||
}
|
||||
#endif
|
||||
int pack_weight_size = up_round_oc * up_round_deep;
|
||||
int bias_size = up_round_oc * sizeof(int32_t);
|
||||
size_t bias_size = up_round_oc * sizeof(int32_t);
|
||||
int32_t input_zp = conv_param_->conv_quant_arg_.input_quant_args_[0].zp_;
|
||||
|
||||
// init weight
|
||||
|
@ -150,7 +150,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionInt8CPUKernel::InitTmpBufferOpt() {
|
||||
int ConvolutionInt8CPUKernel::InitTmpBuffer() {
|
||||
MS_ASSERT(ctx_->allocator != nullptr);
|
||||
int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_;
|
||||
int tmp_size;
|
||||
|
@ -181,7 +181,7 @@ int ConvolutionInt8CPUKernel::Init() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = InitWeightBiasOpt();
|
||||
ret = InitWeightBias();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Initialization for optimized int8 conv failed.";
|
||||
return RET_ERROR;
|
||||
|
@ -212,9 +212,8 @@ int ConvolutionInt8CPUKernel::RunImpl(int task_id) {
|
|||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
auto ori_input_data = reinterpret_cast<int8_t *>(input_tensor->MutableData());
|
||||
auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
ConvInt8Opt(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_,
|
||||
reinterpret_cast<int32_t *>(bias_data_), output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_,
|
||||
matmul_func_, support_optimize_);
|
||||
ConvInt8(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_),
|
||||
output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_, matmul_func_, support_optimize_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -229,7 +228,7 @@ int ConvolutionInt8Impl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionInt8CPUKernel::Run() {
|
||||
auto ret = InitTmpBufferOpt();
|
||||
auto ret = InitTmpBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init tmp buffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -51,8 +51,8 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel {
|
|||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
void CheckSupportOptimize();
|
||||
int InitWeightBiasOpt();
|
||||
int InitTmpBufferOpt();
|
||||
int InitWeightBias();
|
||||
int InitTmpBuffer();
|
||||
|
||||
private:
|
||||
void FreeTmpBuffer() {
|
||||
|
|
|
@ -149,12 +149,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() {
|
|||
int DeconvolutionDepthwiseInt8CPUKernel::ReSize() {
|
||||
InitSlideParam();
|
||||
ConvolutionBaseCPUKernel::Init();
|
||||
|
||||
auto ret = InitBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -179,6 +173,11 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "Only support input channel equals output channel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = InitBuffer();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
|
||||
return ret;
|
||||
}
|
||||
|
||||
// pack input, assume input format: NHWC -> NHWC4
|
||||
auto input_tensor = in_tensors_.at(kInputIndex);
|
||||
|
@ -191,7 +190,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
|
|||
packed_output_ = output_addr;
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
Loading…
Reference in New Issue