From fc2f78a40365b815cf5bb5525150abc476cb687b Mon Sep 17 00:00:00 2001 From: zhang__sss Date: Fri, 9 Jul 2021 14:42:23 +0800 Subject: [PATCH] code clean --- mindspore/compression/common/__init__.py | 5 +- mindspore/compression/export/quant_export.py | 279 +++++++++++------- mindspore/compression/quant/__init__.py | 11 +- mindspore/compression/quant/quant_utils.py | 116 ++------ .../lite/tools/anf_exporter/anf_exporter.cc | 8 +- .../ascend310_infer/src/ModelProcess.cpp | 2 +- .../ascend310_infer/src/SampleProcess.cpp | 4 +- .../ascend310_infer/src/ModelProcess.cpp | 2 +- .../ascend310_infer/src/SampleProcess.cpp | 4 +- .../ascend310_infer/src/ModelProcess.cpp | 2 +- .../ascend310_infer/src/SampleProcess.cpp | 4 +- .../ascend310_infer/src/ModelProcess.cpp | 2 +- .../ascend310_infer/src/SampleProcess.cpp | 6 +- .../official/nlp/q8bert/scripts/run_eval.sh | 2 +- .../q8bert/scripts/run_standalone_train.sh | 2 +- 15 files changed, 234 insertions(+), 215 deletions(-) diff --git a/mindspore/compression/common/__init__.py b/mindspore/compression/common/__init__.py index fb83535cf65..148bdbbf865 100644 --- a/mindspore/compression/common/__init__.py +++ b/mindspore/compression/common/__init__.py @@ -16,7 +16,4 @@ Compression common module. """ -from .constant import * - -__all__ = [] -__all__.extend(constant.__all__) +from .constant import QuantDtype diff --git a/mindspore/compression/export/quant_export.py b/mindspore/compression/export/quant_export.py index 10f6e20089f..aca23103941 100644 --- a/mindspore/compression/export/quant_export.py +++ b/mindspore/compression/export/quant_export.py @@ -35,6 +35,7 @@ from ..quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell __all__ = ["ExportToQuantInferNetwork"] + class QuantBlock(Cell): r""" A quant block of Conv/Dense, activation layer for Ascend deploy. @@ -180,6 +181,7 @@ class QuantMindirBlock(Cell): s += f', activation={self.activation}' return s + class ExportToQuantInferNetwork: """ Convert quantization aware network to infer network. @@ -199,18 +201,13 @@ class ExportToQuantInferNetwork: def __init__(self, network, mean, std_dev, *inputs, is_mindir=False): network = Validator.check_isinstance('network', network, (nn.Cell,)) - self.input_scale = 1 / std_dev - self.input_zero_point = round(mean) self.data_type = mstype.int8 self.network = copy.deepcopy(network) - self.network_bk = copy.deepcopy(network) - self.all_parameters = {p.name: p for p in self.network.get_parameters()} self.get_inputs_table(inputs) self.mean = mean self.std_dev = std_dev self.is_mindir = is_mindir self.upcell = None - self.upname = None def get_inputs_table(self, inputs): """Get the input quantization parameters of quantization cell for quant export.""" @@ -220,6 +217,7 @@ class ExportToQuantInferNetwork: def run(self): """Start to convert.""" + self.network_bk = copy.deepcopy(self.network) self.network.update_cell_prefix() network = self.network if isinstance(network, _AddFakeQuantInput): @@ -229,7 +227,36 @@ class ExportToQuantInferNetwork: def _get_quant_block(self, cell_core, activation, fake_quant_a_out): """convert network's quant subcell to deploy subcell""" - # Calculate the scale and zero point + scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out) + + # Build the `Quant` `Dequant` op. + # Quant only support perlayer version. Need check here. + quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in)) + scale_deq = self.__get_dequant_scale(scale_a_in, scale_w) + dequant_op = inner.Dequant() + + if isinstance(activation, _AddFakeQuantAfterSubCell): + activation = activation.subcell + elif hasattr(activation, "get_origin"): + activation = activation.get_origin() + + # get op + if isinstance(cell_core, quant.DenseQuant): + op_core = P.MatMul() + else: + op_core = cell_core.conv + + # get the `weight` and `bias` + weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w) + + if self.is_mindir: + block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict) + else: + block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) + return block + + def __get_quant_param(self, cell_core, fake_quant_a_out): + """get parameter for quant block""" w_minq_name = cell_core.fake_quant_weight.minq.name w_maxq_name = cell_core.fake_quant_weight.maxq.name np_type = mstype.dtype_to_nptype(self.data_type) @@ -262,7 +289,7 @@ class ExportToQuantInferNetwork: _, minq_name = info if minq_name == 'input': scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ - self.input_scale, self.input_zero_point, 'None', 'None' + (1 / self.std_dev), round(self.mean), 'None', 'None' else: fake_quant_a_in_prefix = minq_name[:-5] cells = self.network_bk.cells_and_names() @@ -270,26 +297,34 @@ class ExportToQuantInferNetwork: if cell[0].endswith(fake_quant_a_in_prefix): fake_quant_a_in = cell[1] break - scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type) param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range else: # skip quant layer scale_a_in, zp_a_in = 1.0, 0.0 + return scale_a_in, zp_a_in, scale_w, zp_w, param_dict - # Build the `Quant` `Dequant` op. - # Quant only support perlayer version. Need check here. - quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in)) + @staticmethod + def __get_dequant_scale(scale_a_in, scale_w): + """Get dequant scale""" scale_deq = scale_a_in * scale_w - dequant_op = inner.Dequant() - if isinstance(activation, _AddFakeQuantAfterSubCell): - activation = activation.subcell - elif hasattr(activation, "get_origin"): - activation = activation.get_origin() + # fuse parameter + # |--------|47:40|--------|39:32|--------|31:0| + # offset_w [8] shift_N [8] deq_scale [32] + float32_deq_scale = scale_deq.astype(np.float32) + uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32) + scale_length = scale_deq.size # channel + dequant_param = np.zeros(scale_length, dtype=np.uint64) + for index in range(scale_length): + dequant_param[index] += uint32_deq_scale[index] + scale_deq = Tensor(dequant_param, mstype.uint64) + return scale_deq - # get the `weight` and `bias` + def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w): + """Get weight and bias for quantizaiton""" + np_type = mstype.dtype_to_nptype(self.data_type) weight = cell_core.weight.data.asnumpy() bias = None if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): @@ -302,37 +337,22 @@ class ExportToQuantInferNetwork: weight_b = weight bias_b = bias # apply the quant - weight = quant_utils.weight2int(weight, scale_w, zp_w, np_type, cell_core.fake_quant_weight.num_bits, - cell_core.fake_quant_weight.narrow_range) + quant_min, quant_max = quant_utils.get_quant_min_max(np_type, + cell_core.fake_quant_weight.num_bits, + cell_core.fake_quant_weight.narrow_range) + weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max) if bias is not None: bias = Tensor(bias / scale_a_in / scale_w, mstype.int32) - # fuse parameter - # |--------|47:40|--------|39:32|--------|31:0| - # offset_w [8] shift_N [8] deq_scale [32] - float32_deq_scale = scale_deq.astype(np.float32) - uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32) - scale_length = scale_deq.size # channel - dequant_param = np.zeros(scale_length, dtype=np.uint64) - for index in range(scale_length): - dequant_param[index] += uint32_deq_scale[index] - scale_deq = Tensor(dequant_param, mstype.uint64) - # get op if isinstance(cell_core, quant.DenseQuant): - op_core = P.MatMul() weight = np.transpose(weight) weight_b = np.transpose(weight_b) - else: - op_core = cell_core.conv + weight = Tensor(weight, self.data_type) weight_b = Tensor(weight_b) if bias_b is not None: bias_b = Tensor(bias_b, mstype.float32) - if self.is_mindir: - block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict) - else: - block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) - return block + return weight, bias, weight_b, bias_b def _add_output_min_max_for_op(self, origin_op, fake_quant_cell): """add output quant info for quant op for export mindir.""" @@ -343,6 +363,121 @@ class ExportToQuantInferNetwork: origin_op.add_prim_attr('output_maxq', Tensor(maxq)) origin_op.add_prim_attr('output_minq', Tensor(minq)) + def _convert_subcell(self, network, change, name, subcell): + """Convert subcell to ant subcell.""" + if subcell is not None and hasattr(subcell, "fake_quant_weight"): + new_subcell = self._get_quant_block(subcell, None, None) + prefix = subcell.param_prefix + new_subcell.update_parameters_name(prefix + '.') + self.upcell = new_subcell + network.insert_child_to_cell(name, new_subcell) + change = True + return network, change + + def _convert_conv(self, network, change, name, subcell): + """Convert subcell to ant subcell for conv.""" + cell_core = subcell.conv + activation = subcell.activation + fake_quant_act = None + if hasattr(activation, 'fake_quant_act_before'): + fake_quant_act = activation.fake_quant_act_before + elif hasattr(activation, 'fake_quant_act'): + fake_quant_act = activation.fake_quant_act + if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): + new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) + self.upcell = None + prefix = subcell.param_prefix + new_subcell.update_parameters_name(prefix + '.') + network.insert_child_to_cell(name, new_subcell) + change = True + return network, change + + def _convert_dense(self, network, change, name, subcell): + """Convert subcell to ant subcell for dense.""" + cell_core = subcell.dense + activation = subcell.activation + fake_quant_act = None + if hasattr(activation, 'fake_quant_act_before'): + fake_quant_act = activation.fake_quant_act_before + elif hasattr(activation, 'fake_quant_act'): + fake_quant_act = activation.fake_quant_act + if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): + new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) + prefix = subcell.param_prefix + new_subcell.update_parameters_name(prefix + '.') + network.insert_child_to_cell(name, new_subcell) + self.upcell = None + change = True + return network, change + + def _convert_act(self, subcell): + """Convert subcell to ant subcell for activation.""" + activation = subcell.get_origin() + if isinstance(activation, nn.ReLU): + self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act) + elif isinstance(activation, nn.ReLU6): + self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act) + if self.upcell: + self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act) + return activation + + def _convert_add(self, subcell): + """Convert subcell to ant subcell for add.""" + if isinstance(subcell.add, _AddFakeQuantAfterSubCell): + add_op = subcell.add.subcell + subcell.__delattr__("add") + subcell.__setattr__("add", add_op) + add_op = subcell.add + self._add_output_min_max_for_op(add_op, subcell.fake_quant_act) + subcell.__delattr__("fake_quant_act") + subcell.__setattr__("fake_quant_act", P.identity()) + + def _convert_observer(self, network, name, subcell): + """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver.""" + if self.upcell: + self._add_output_min_max_for_op(self.upcell.core_op, subcell) + network.__delattr__(name) + network.__setattr__(name, P.identity()) + + def _convert_fake_quant_after_cell(self, network, name, subcell): + """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell.""" + op = subcell.subcell + self._add_output_min_max_for_op(op, subcell.fake_quant_act) + network.__delattr__(name) + network.__setattr__(name, op) + + def _convert_core_quant_subcell(self, network, change, name, subcell): + """Convert subcell to ant subcell for conv and dense.""" + is_core_subcell = True + if isinstance(subcell, nn.Conv2dBnAct): + network, change = self._convert_conv(network, change, name, subcell) + elif isinstance(subcell, nn.DenseBnAct): + network, change = self._convert_dense(network, change, name, subcell) + elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv, + quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)): + network, change = self._convert_subcell(network, change, name, subcell) + else: + is_core_subcell = False + return is_core_subcell, network, change + + def _convert_other_quant_subcell(self, network, change, name, subcell): + """Convert subcell to ant subcell for cell except conv and dense.""" + is_other_subcell = True + if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"): + activation = self._convert_act(subcell) + network.insert_child_to_cell(name, activation) + change = True + elif isinstance(subcell, nn.TensorAddQuant): + self._convert_add(subcell) + elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver): + self._convert_observer(network, name, subcell) + elif isinstance(subcell, _AddFakeQuantAfterSubCell): + self._convert_fake_quant_after_cell(network, name, subcell) + change = True + else: + is_other_subcell = False + return is_other_subcell, network, change + def _convert_quant2deploy(self, network): """Convert network's all quant subcell to deploy subcell.""" cells = network.name_cells() @@ -351,71 +486,11 @@ class ExportToQuantInferNetwork: subcell = cells[name] if subcell == network: continue - if isinstance(subcell, nn.Conv2dBnAct): - network, change = self._convert_subcell(network, change, name, subcell) - elif isinstance(subcell, nn.DenseBnAct): - network, change = self._convert_subcell(network, change, name, subcell, conv=False) - elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv, - quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)): - network, change = self._convert_subcell(network, change, name, subcell, core=False) - elif isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"): - activation = subcell.get_origin() - if isinstance(activation, nn.ReLU): - self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act) - elif isinstance(activation, nn.ReLU6): - self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act) - if self.upcell: - self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act) - network.insert_child_to_cell(name, activation) - change = True - elif isinstance(subcell, nn.TensorAddQuant): - if isinstance(subcell.add, _AddFakeQuantAfterSubCell): - add_op = subcell.add.subcell - subcell.__delattr__("add") - subcell.__setattr__("add", add_op) - add_op = subcell.add - self._add_output_min_max_for_op(add_op, subcell.fake_quant_act) - subcell.__delattr__("fake_quant_act") - subcell.__setattr__("fake_quant_act", P.identity()) - elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver): - if self.upcell: - self._add_output_min_max_for_op(self.upcell.core_op, subcell) - network.__delattr__(name) - network.__setattr__(name, P.identity()) - elif isinstance(subcell, _AddFakeQuantAfterSubCell): - op = subcell.subcell - self._add_output_min_max_for_op(op, subcell.fake_quant_act) - network.__delattr__(name) - network.__setattr__(name, op) - change = True - else: - self.upcell, self.upname = None, None + is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell) + is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell) + if not is_core_quant_subcell and not is_other_quant_subcell: + self.upcell = None self._convert_quant2deploy(subcell) if isinstance(network, nn.SequentialCell) and change: network.cell_list = list(network.cells()) return network - - def _convert_subcell(self, network, change, name, subcell, core=True, conv=True): - """Convert subcell to ant subcell.""" - new_subcell = None - fake_quant_act = None - if core: - cell_core = subcell.conv if conv else subcell.dense - activation = subcell.activation - if hasattr(activation, 'fake_quant_act_before'): - fake_quant_act = activation.fake_quant_act_before - elif hasattr(activation, 'fake_quant_act'): - fake_quant_act = activation.fake_quant_act - else: - cell_core = subcell - activation = None - if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): - new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) - if new_subcell: - prefix = subcell.param_prefix - new_subcell.update_parameters_name(prefix + '.') - self.upcell = None if core else new_subcell - self.upname = None if core else name - network.insert_child_to_cell(name, new_subcell) - change = True - return network, change diff --git a/mindspore/compression/quant/__init__.py b/mindspore/compression/quant/__init__.py index 233c50f2603..092a1c48a69 100644 --- a/mindspore/compression/quant/__init__.py +++ b/mindspore/compression/quant/__init__.py @@ -16,11 +16,6 @@ Compression quant module. """ -from .quantizer import * -from .qat import * -from .quant_utils import * - -__all__ = [] -__all__.extend(qat.__all__) -__all__.extend(quantizer.__all__) -__all__.extend(quant_utils.__all__) +from .quantizer import OptimizeOption +from .qat import QuantizationAwareTraining, create_quant_config +from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers diff --git a/mindspore/compression/quant/quant_utils.py b/mindspore/compression/quant/quant_utils.py index 6e0f2bd8d30..21bbd46607f 100644 --- a/mindspore/compression/quant/quant_utils.py +++ b/mindspore/compression/quant/quant_utils.py @@ -23,22 +23,20 @@ __all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"] def cal_quantization_params(input_min, input_max, + quant_min, + quant_max, data_type, - num_bits=8, - symmetric=False, - narrow_range=False, - neg_trunc=False): + symmetric=False): r""" Calculate quantization params for scale and zero point. Args: input_min (numpy.ndarray): The dimension of channel or 1. input_max (numpy.ndarray): The dimension of channel or 1. + quant_min (int): The minimum quantization integer. + quant_max (int): The maximum quantization integer. data_type (numpy type) : Can be numpy int8, numpy uint8. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. - narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. - neg_trunc (bool): Whether the quantization algorithm uses negative truncation or not. Default: False. Returns: scale (numpy.ndarray): quantization param. @@ -56,6 +54,24 @@ def cal_quantization_params(input_min, if (input_max == input_min).all(): return np.ones(input_min.shape), np.zeros(input_min.shape) + # calculate scale + if symmetric: + input_max = np.maximum(-input_min, input_max) + input_min = -input_max + scale = (input_max - input_min) / (quant_max - quant_min) + + # calculate zero point + if data_type == np.int8 and symmetric: + zp = np.zeros(input_min.shape) + else: + zp_double = quant_min - input_min / scale + zp = np.floor(zp_double + 0.5) + + return scale, zp + + +def get_quant_min_max(data_type, num_bits=8, narrow_range=False): + """Calculate quantization params for minimum/maximum quantization integer""" if data_type == np.int8: quant_min = 0 - 2 ** (num_bits - 1) quant_max = 2 ** (num_bits - 1) - 1 @@ -66,24 +82,10 @@ def cal_quantization_params(input_min, raise ValueError("Unsupported datatype({})".format(data_type)) if narrow_range: quant_min = quant_min + 1 - - # calculate scale - if symmetric and not neg_trunc: - input_max = np.maximum(-input_min, input_max) - input_min = -input_max - scale = (input_max - input_min) / (quant_max - quant_min) - - # calculate zero point - if data_type == np.int8 and symmetric and not neg_trunc: - zp = np.zeros(input_min.shape) - else: - zp_double = quant_min - input_min / scale - zp = np.floor(zp_double + 0.5) - - return scale, zp + return quant_min, quant_max -def weight2int(data, scale, zero_point, data_type, num_bits=8, narrow_range=False): +def weight2int(data, scale, zero_point, quant_min, quant_max): r""" Calculate int8/uint8 weight from fp32. the formula is defined as: @@ -94,9 +96,8 @@ def weight2int(data, scale, zero_point, data_type, num_bits=8, narrow_range=Fals data (numpy.ndarray): The dimension of channel or 1. Should be NCHW. scale (numpy.ndarray): The dimension of channel or 1. zero_point (numpy.ndarray): The dimension of channel or 1. - data_type (numpy type) : Can be numpy int8, numpy uint8. - num_bits (int): Quantization number bit, support 4 and 8bit. Default: 8. - narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. + quant_min (int): The minimum quantization integer. + quant_max (int): The maximum quantization integer. Returns: weight (numpy.ndarray): The dimension of channel or 1. @@ -120,17 +121,6 @@ def weight2int(data, scale, zero_point, data_type, num_bits=8, narrow_range=Fals else: raise ValueError("Unsupported weight shape({})".format(data.shape)) - if data_type == np.int8: - quant_min = 0 - 2 ** (num_bits - 1) - quant_max = 2 ** (num_bits - 1) - 1 - elif data_type == np.uint8: - quant_min = 0 - quant_max = 2 ** num_bits - 1 - else: - raise ValueError("Unsupported weight datatype({})".format(data_type)) - if narrow_range: - quant_min = quant_min + 1 - weight_int = np.round((data / scale) + zero_point) weight_int[weight_int > quant_max] = quant_max weight_int[weight_int < quant_min] = quant_min @@ -145,54 +135,12 @@ def scale_zp_max_min_from_fake_quant_cell(cell, data_type): if cell.mode == 'LEARNED_SCALE': maxq = np.abs(maxq) minq = -np.abs(minq) - + quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range) + symmetric = cell.symmetric and not cell.neg_trunc scale, zp = cal_quantization_params( - minq, maxq, data_type, - num_bits=cell.num_bits, - symmetric=cell.symmetric, - narrow_range=cell.narrow_range, - neg_trunc=cell.neg_trunc) - return scale, zp, maxq, minq - - -def scale_zp_from_data(op, minq, maxq, data_type): - r""" - Get calculate quantization params for scale and zero point. - - Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive. - - Args: - op (Primitive): Fake quant primitive `mindspore.ops.operation.FakeQuantPerLayer` or - `mindspore.ops.operation.FakeQuantPerChannel` - minq (Parameter): Parameter `minq` of `mindspore.nn.layer.FakeQuantWithMinMax` - maxq (Parameter): Parameter `maxq` of `mindspore.nn.layer.FakeQuantWithMinMax` - data_type (numpy type): Can be `numpy.int8` or `numpy.uint8`. - - Returns: - scale (numpy.ndarray): quantization param. - zero point (numpy.ndarray): quantization param. - """ - minq = minq.data.asnumpy() - maxq = maxq.data.asnumpy() - - scale, zp = cal_quantization_params( - minq, maxq, data_type, - num_bits=op.num_bits, - symmetric=op.symmetric, - narrow_range=op.narrow_range) - return scale, zp - - -def scale_zp_max_min_from_data(op, minq, maxq, data_type): - """Get calculate quantization params for scale, zero point, max and min.""" - minq = minq.data.asnumpy() - maxq = maxq.data.asnumpy() - - scale, zp = cal_quantization_params( - minq, maxq, data_type, - num_bits=op.num_bits, - symmetric=op.symmetric, - narrow_range=op.narrow_range) + minq, maxq, + quant_min, quant_max, data_type, + symmetric=symmetric) return scale, zp, maxq, minq diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index a3774f045c8..3d9a236f5bd 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -46,6 +46,8 @@ using mindspore::ops::PrimitiveC; namespace mindspore::lite { namespace { +constexpr int BIT_NUM_8 = 8; +constexpr int BIT_NUM_16 = 16; std::list GetOrderedCNodes(const FuncGraphPtr fg) { auto BelongSameGraph = std::bind(IncludeBelongGraph, fg, std::placeholders::_1); auto succ_include_fv = [&fg](const AnfNodePtr &node) -> std::vector { @@ -113,14 +115,14 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt auto repetition_packed = false; MS_LOG(DEBUG) << dst_node->name; if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) { - if (bit_num <= 8) { + if (bit_num <= BIT_NUM_8) { repetition_packed = PackRepetition(bit_num, tensor_input); } else { repetition_packed = PackRepetition(bit_num, tensor_input); } } - - if (bit_num != 8 && bit_num != 16 && !repetition_packed && dst_node->quantType != schema::QuantType_QUANT_NONE) { + if (bit_num != BIT_NUM_8 && bit_num != BIT_NUM_16 && !repetition_packed && + dst_node->quantType != schema::QuantType_QUANT_NONE) { auto status = DoBitPack(bit_num, tensor_input); if (status != RET_OK) { MS_LOG(ERROR) << "do bit pack failed. " << status; diff --git a/model_zoo/official/cv/lenet_quant/ascend310_infer/src/ModelProcess.cpp b/model_zoo/official/cv/lenet_quant/ascend310_infer/src/ModelProcess.cpp index 5586ebbd9f9..72744284906 100644 --- a/model_zoo/official/cv/lenet_quant/ascend310_infer/src/ModelProcess.cpp +++ b/model_zoo/official/cv/lenet_quant/ascend310_infer/src/ModelProcess.cpp @@ -181,7 +181,7 @@ void ModelProcess::DumpModelOutputResult(char *output_name) { std::string fileName = std::string(output_name) + '_' + std::to_string(i) + ".bin"; std::string outputFileName = homePath + "/" + fileName; FILE *outputFile = fopen(outputFileName.c_str(), "wb"); - if (outputFile) { + if (outputFile != nullptr) { aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); void* data = aclGetDataBufferAddr(dataBuffer); uint32_t len = aclGetDataBufferSizeV2(dataBuffer); diff --git a/model_zoo/official/cv/lenet_quant/ascend310_infer/src/SampleProcess.cpp b/model_zoo/official/cv/lenet_quant/ascend310_infer/src/SampleProcess.cpp index 658e24251df..889e696bb2a 100644 --- a/model_zoo/official/cv/lenet_quant/ascend310_infer/src/SampleProcess.cpp +++ b/model_zoo/official/cv/lenet_quant/ascend310_infer/src/SampleProcess.cpp @@ -82,8 +82,8 @@ Result SampleProcess::InitResource(const char *aclConfigPath) { } void SampleProcess::GetAllFiles(std::string path, std::vector *files) { - DIR *pDir; - struct dirent* ptr; + DIR *pDir = nullptr; + struct dirent* ptr = nullptr; if (!(pDir = opendir(path.c_str()))) return; while ((ptr = readdir(pDir)) != 0) { diff --git a/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/ModelProcess.cpp b/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/ModelProcess.cpp index 5586ebbd9f9..72744284906 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/ModelProcess.cpp +++ b/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/ModelProcess.cpp @@ -181,7 +181,7 @@ void ModelProcess::DumpModelOutputResult(char *output_name) { std::string fileName = std::string(output_name) + '_' + std::to_string(i) + ".bin"; std::string outputFileName = homePath + "/" + fileName; FILE *outputFile = fopen(outputFileName.c_str(), "wb"); - if (outputFile) { + if (outputFile != nullptr) { aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); void* data = aclGetDataBufferAddr(dataBuffer); uint32_t len = aclGetDataBufferSizeV2(dataBuffer); diff --git a/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/SampleProcess.cpp b/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/SampleProcess.cpp index 057f97f8624..6870f9d8e43 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/SampleProcess.cpp +++ b/model_zoo/official/cv/mobilenetv2_quant/ascend310_infer/src/SampleProcess.cpp @@ -82,8 +82,8 @@ Result SampleProcess::InitResource(const char *aclConfigPath) { } void SampleProcess::GetAllFiles(std::string path, std::vector *files) { - DIR *pDir; - struct dirent* ptr; + DIR *pDir = nullptr; + struct dirent* ptr = nullptr; if (!(pDir = opendir(path.c_str()))) return; while ((ptr = readdir(pDir)) != 0) { diff --git a/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/ModelProcess.cpp b/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/ModelProcess.cpp index 5586ebbd9f9..72744284906 100644 --- a/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/ModelProcess.cpp +++ b/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/ModelProcess.cpp @@ -181,7 +181,7 @@ void ModelProcess::DumpModelOutputResult(char *output_name) { std::string fileName = std::string(output_name) + '_' + std::to_string(i) + ".bin"; std::string outputFileName = homePath + "/" + fileName; FILE *outputFile = fopen(outputFileName.c_str(), "wb"); - if (outputFile) { + if (outputFile != nullptr) { aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); void* data = aclGetDataBufferAddr(dataBuffer); uint32_t len = aclGetDataBufferSizeV2(dataBuffer); diff --git a/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/SampleProcess.cpp b/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/SampleProcess.cpp index 057f97f8624..6870f9d8e43 100644 --- a/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/SampleProcess.cpp +++ b/model_zoo/official/cv/resnet50_quant/ascend310_infer/src/SampleProcess.cpp @@ -82,8 +82,8 @@ Result SampleProcess::InitResource(const char *aclConfigPath) { } void SampleProcess::GetAllFiles(std::string path, std::vector *files) { - DIR *pDir; - struct dirent* ptr; + DIR *pDir = nullptr; + struct dirent* ptr = nullptr; if (!(pDir = opendir(path.c_str()))) return; while ((ptr = readdir(pDir)) != 0) { diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/ModelProcess.cpp b/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/ModelProcess.cpp index 76cb1fa2594..339bdcd09f6 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/ModelProcess.cpp +++ b/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/ModelProcess.cpp @@ -181,7 +181,7 @@ void ModelProcess::DumpModelOutputResult(char *output_name) { std::string fileName = std::string(output_name) + '_' + std::to_string(i) + ".bin"; std::string outputFileName = homePath + "/" + fileName; FILE *outputFile = fopen(outputFileName.c_str(), "wb"); - if (outputFile) { + if (outputFile != nullptr) { aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); void* data = aclGetDataBufferAddr(dataBuffer); uint32_t len = aclGetDataBufferSizeV2(dataBuffer); diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/SampleProcess.cpp b/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/SampleProcess.cpp index ed35f79626e..fb8a1813492 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/SampleProcess.cpp +++ b/model_zoo/official/cv/yolov3_darknet53_quant/ascend310_infer/src/SampleProcess.cpp @@ -82,8 +82,8 @@ Result SampleProcess::InitResource(const char *aclConfigPath) { } void SampleProcess::GetAllFiles(std::string path, std::vector *files) { - DIR *pDir; - struct dirent* ptr; + DIR *pDir = nullptr; + struct dirent* ptr = nullptr; if (!(pDir = opendir(path.c_str()))) return; while ((ptr = readdir(pDir)) != 0) { @@ -127,11 +127,13 @@ Result SampleProcess::Process(const char *om_path, const char *input_folder) { void *inputShapeBuffer = nullptr; int mret = aclrtMalloc(&inputShapeBuffer, 8, ACL_MEM_MALLOC_NORMAL_ONLY); if (mret != ACL_ERROR_NONE) { + aclrtFree(inputShape); aclrtFree(inputShapeBuffer); return FAILED; } mret = aclrtMemcpy(reinterpret_cast(inputShapeBuffer), 8, inputShape, 8, ACL_MEMCPY_HOST_TO_DEVICE); if (mret != ACL_ERROR_NONE) { + aclrtFree(inputShape); aclrtFree(inputShapeBuffer); return FAILED; } diff --git a/model_zoo/official/nlp/q8bert/scripts/run_eval.sh b/model_zoo/official/nlp/q8bert/scripts/run_eval.sh index 67797f98a0e..9bef1f54fd2 100644 --- a/model_zoo/official/nlp/q8bert/scripts/run_eval.sh +++ b/model_zoo/official/nlp/q8bert/scripts/run_eval.sh @@ -32,7 +32,7 @@ eval_data_dir=$3 load_ckpt_path=$4 mkdir -p ms_log -PROJECT_DIR=$(cd "$(dirname "$0")"; pwd) +PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) CUR_DIR=`pwd` export GLOG_log_dir=${CUR_DIR}/ms_log export GLOG_logtostderr=0 diff --git a/model_zoo/official/nlp/q8bert/scripts/run_standalone_train.sh b/model_zoo/official/nlp/q8bert/scripts/run_standalone_train.sh index 1cee0f79369..bc78509fcb0 100644 --- a/model_zoo/official/nlp/q8bert/scripts/run_standalone_train.sh +++ b/model_zoo/official/nlp/q8bert/scripts/run_standalone_train.sh @@ -33,7 +33,7 @@ eval_data_dir=$4 load_ckpt_path=$5 mkdir -p ms_log -PROJECT_DIR=$(cd "$(dirname "$0")"; pwd) +PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) CUR_DIR=`pwd` export GLOG_log_dir=${CUR_DIR}/ms_log export GLOG_logtostderr=0