!3130 [dump]support dump quant in binary format file

Merge pull request !3130 from vlne-v1/quant_op_depthwise
2020-07-17 14:25:57 +08:00 · 2020-07-17 14:25:57 +08:00 · 6dd99ee35d
parent a10001cb58 28d1d37085
commit 6dd99ee35d
10 changed files with 90 additions and 88 deletions
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
  MS_LOG(INFO) << "End save compiled func graph!";
 }

-bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
-  std::string phase_prefix = GetPhasePrefix(phase_s);
-
-  if (use_vm && phase_prefix == "export") {
-    MS_LOG(INFO) << "Use ge backend to export geir";
-    use_vm = false;
-  }
-  return use_vm;
-}
-
 void ExecutorPy::GetGeBackendPolicy() const {
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const {
  }
 }

+bool IsPhaseExportGeir(const std::string &phase_s) {
+  auto phase_to_export = "export.geir";
+  return phase_s.rfind(phase_to_export, 0) != std::string::npos;
+}
+
+std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) {
+  bool is_geir = IsPhaseExportGeir(phase_s);
+
+  std::string backend = MsContext::GetInstance()->backend_policy();
+
+#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
+  if (mindspore::parallel::ps::Util::IsParamServerMode()) {
+    mindspore::parallel::ps::Util::SetInternalEnvVar();
+  }
+  if (parallel::ps::Util::IsRoleOfPServer()) {
+    resource->results()[kBackend] = compile::CreateBackend();
+    return PServerPipeline();
+  }
+  if (parallel::ps::Util::IsRoleOfScheduler()) {
+    return PSchedulerPipeline();
+  }
+#endif
+
+  if (use_vm && backend != "ge" && !is_geir) {
+    // Create backend and session
+    auto backend_ptr = compile::CreateBackend();
+    // Connect session to debugger
+    backend_ptr->SetDebugger();
+    resource->results()[kBackend] = backend_ptr;
+    return VmPipeline();
+  }
+  return GePipeline();
+}
+
 bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) {
  MS_LOG(DEBUG) << "Start ExecutorPy compile!";
  if ((!py::isinstance<py::str>(phase))) {
@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
  std::string phase_s = py::cast<std::string>(phase);
  MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!";
  ResourcePtr resource = std::make_shared<Resource>(obj);
-  std::vector<ActionItem> p_actions;
-
-  use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s);
-
-  std::string backend = MsContext::GetInstance()->backend_policy();
-#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
-  if (mindspore::parallel::ps::Util::IsParamServerMode()) {
-    mindspore::parallel::ps::Util::SetInternalEnvVar();
-  }
-  if (parallel::ps::Util::IsRoleOfPServer()) {
-    resource->results()[kBackend] = compile::CreateBackend();
-    p_actions = PServerPipeline();
-  } else if (parallel::ps::Util::IsRoleOfScheduler()) {
-    p_actions = PSchedulerPipeline();
-  } else if (use_vm && backend != "ge") {
-    // Create backend and session
-    auto backend_ptr = compile::CreateBackend();
-    // Connect session to debugger
-    backend_ptr->SetDebugger();
-    resource->results()[kBackend] = backend_ptr;
-    p_actions = VmPipeline();
-  } else {
-    p_actions = GePipeline();
-  }
-#else
-  if (use_vm && backend != "ge") {
-    // Create backend and session
-    auto backend_ptr = compile::CreateBackend();
-    // Connect session to debugger
-    backend_ptr->SetDebugger();
-    resource->results()[kBackend] = backend_ptr;
-    p_actions = VmPipeline();
-  } else {
-    p_actions = GePipeline();
-  }
-#endif

+  auto p_actions = GetPipline(resource, phase_s, use_vm);
  std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));

  // get the parameters items and add the value to args_spec
@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
 }

 std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
-  // phase does not contain 'export_onnx'
-  if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
+  // filter action after validate when 'export'.
+  if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) {
    return actions;
  }
  MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
 private:
  ExecutorPy();
  void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
-  bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
  void GetGeBackendPolicy() const;
  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
  // 'validate' stage
--- a/mindspore/ccsrc/transform/graph_ir/convert.cc
+++ b/mindspore/ccsrc/transform/graph_ir/convert.cc
@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss";
 const char kNameCTCLoss[] = "CTCLoss";
 const char kNameRange[] = "Range";
 const char kNameSquareSumAll[] = "SquareSumAll";
-const char kNameAscendQuant[] = "AscendQuant";
-const char kNameAscendDequant[] = "AscendDequant";
+const char kNameAscendQuant[] = "Quant";
+const char kNameAscendDequant[] = "Dequant";
 const char kNameCase[] = "Case";

 // -----------------OpAdapter initialization--------------
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@ -1107,7 +1107,7 @@ class QuantBlock(Cell):
    r"""
    A quant block of Conv/Dense, activation layer for Ascend deploy.

-    Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant.
+    Calculate Conv or Dense in Int8, with Quant and DeQuant.

    Notes:
        This block is only for deploy, and not trainable.
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer):
        return x_dtype


-class AscendQuant(PrimitiveWithInfer):
+class Quant(PrimitiveWithInfer):
    r"""
    Returns the quantized value of input_x.

@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer):

    Examples:
        >>> input_x = Tensor([100.0, 150.0], mstype.float32)
-        >>> quant = P.AscendQuant(80.0, 0.0, False, "Round")
+        >>> quant = P.Quant(80.0, 0.0, False, "Round")
        >>> y = quant(input_x)
    """

@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer):
        return mstype.int8


-class AscendDequant(PrimitiveWithInfer):
+class Dequant(PrimitiveWithInfer):
    r"""
    Returns the dequantized value of input_x.
    This operation will do ReLU to the dequantized value if `relu_flag` is True.
@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer):

    Examples:
        >>> input_x = Tensor([100.0, 150.0], mstype.float32)
-        >>> dequant = P.AscendDequant(False, False)
+        >>> dequant = P.Dequant(False, False)
        >>> y = dequant(input_x)
    """
    @prim_attr_register
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@ -329,14 +329,14 @@ class ExportToQuantInferNetwork:
            return None

        # Build the `Quant` `Dequant` op.
-        # AscendQuant only support perlayer version. Need check here.
-        quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
+        # Quant only support perlayer version. Need check here.
+        quant_op = inner.Quant(float(scale_a_in), float(zp_a_in))
        sqrt_mode = False
        scale_deq = scale_a_out * scale_w
        if (scale_deq < 2 ** -14).all():
            scale_deq = np.sqrt(scale_deq)
            sqrt_mode = True
-        dequant_op = inner.AscendDequant(sqrt_mode)
+        dequant_op = inner.Dequant(sqrt_mode)

        # get op
        op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
        file_name (str): File name of model to export.
        mean (int): Input data mean. Default: 127.5.
        std_dev (int, float): Input data variance. Default: 127.5.
-        file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
-            - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
+        file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported
+            quantization aware model. Default: 'GEIR'.
+
+            - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
+              Ascend model.
+            - BINARY: Binary format for model. An intermidiate representation format for models.
    """
    supported_device = ["Ascend"]
-    supported_formats = ['GEIR']
+    supported_formats = ['GEIR', 'BINARY']

    mean = validator.check_type("mean", mean, (int, float))
    std_dev = validator.check_type("std_dev", std_dev, (int, float))
@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='

    network.set_train(False)

-    if file_format == 'GEIR':
-        exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
-        deploy_net = exporter.run()
-        serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
+    exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
+    deploy_net = exporter.run()
+    serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)


 def convert_quant_network(network,
--- a/mindspore/train/quant/quant_utils.py
+++ b/mindspore/train/quant/quant_utils.py
@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point):
        raise ValueError("`scale` and `zero_point` should have the same shape.")
    if scale.shape[0] < 0:
        raise ValueError("`scale` and `zero_point` shape should greater than zero.")
-    if len(scale.shape) > 1:
+    if len(scale.shape) >= 1 and scale.shape[0] > 1:
        # for perchannel
        if scale.shape[0] == data.shape[0]:
            # `Conv2d` or `Dense` op weight
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'):
    # export model
    net.init_parameters_data()
    if file_format == 'GEIR':
-        _executor.compile(net, *inputs, phase='export')
+        phase_name = 'export.geir'
+        _executor.compile(net, *inputs, phase=phase_name)
        _executor.export(net, file_name, file_format)
    elif file_format == 'ONNX':  # file_format is 'ONNX'
        # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
        #         do not change it to other values.
-        phase_name = 'export_onnx'
+        phase_name = 'export.onnx'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id)
        with open(file_name, 'wb') as f:
            os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
            f.write(onnx_stream)
    elif file_format == 'BINARY':  # file_format is 'BINARY'
-        phase_name = 'export_binary'
+        phase_name = 'export.binary'
        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
        onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir')
        with open(file_name, 'wb') as f:
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@ -2180,36 +2180,36 @@ test_case_other_ops = [
 ]

 test_case_quant_ops = [
-    ('AscendQuant_1', {
-        'block': inner.AscendQuant(0.5, 0.0, False, "Round"),
+    ('Quant_1', {
+        'block': inner.Quant(0.5, 0.0, False, "Round"),
        'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_2', {
-        'block': inner.AscendQuant(80.0, 10.0, True, "Round"),
+    ('Quant_2', {
+        'block': inner.Quant(80.0, 10.0, True, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_3', {
-        'block': inner.AscendQuant(80.0, 0.0, False, "Floor"),
+    ('Quant_3', {
+        'block': inner.Quant(80.0, 0.0, False, "Floor"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_4', {
-        'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"),
+    ('Quant_4', {
+        'block': inner.Quant(80.0, 0.0, False, "Ceil"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_5', {
-        'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"),
+    ('Quant_5', {
+        'block': inner.Quant(80.0, 0.0, False, "Trunc"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_6', {
-        'block': inner.AscendQuant(-80.0, 10.0, False, "Round"),
+    ('Quant_6', {
+        'block': inner.Quant(-80.0, 10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_7', {
-        'block': inner.AscendQuant(80.0, -10.0, False, "Round"),
+    ('Quant_7', {
+        'block': inner.Quant(80.0, -10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
        'skip': ['backward']}),
-    ('AscendQuant_8', {
-        'block': inner.AscendQuant(80.0, 10.0, False, "Round"),
+    ('Quant_8', {
+        'block': inner.Quant(80.0, 10.0, False, "Round"),
        'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)],
        'skip': ['backward']}),
 ]
--- a/tests/ut/python/train/quant/test_quant.py
+++ b/tests/ut/python/train/quant/test_quant.py
@ -75,10 +75,20 @@ def test_qat_lenet():


@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
-def test_qat_mobile():
+def test_qat_mobile_per_channel_tf():
    network = mobilenetV2(num_classes=1000)
    img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
-    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
+    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False])
+    # should load the checkpoint. mock here
+    for param in network.get_parameters():
+        param.init_data()
+    qat.export(network, img, file_name="quant.pb")
+
+@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
+def test_qat_mobile_per_channel_ff():
+    network = mobilenetV2(num_classes=1000)
+    img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
+    network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False])
    # should load the checkpoint. mock here
    for param in network.get_parameters():
        param.init_data()