From 28d1d3708508e180f3b04b798139ba6ffcfa7d85 Mon Sep 17 00:00:00 2001 From: Wei Luning Date: Thu, 16 Jul 2020 19:37:57 +0800 Subject: [PATCH] End at validate when export. --- mindspore/ccsrc/pipeline/jit/pipeline.cc | 85 ++++++++----------- mindspore/ccsrc/pipeline/jit/pipeline.h | 1 - mindspore/ccsrc/transform/graph_ir/convert.cc | 4 +- mindspore/nn/layer/quant.py | 2 +- mindspore/ops/operations/_inner_ops.py | 8 +- mindspore/train/quant/quant.py | 23 ++--- mindspore/train/quant/quant_utils.py | 2 +- mindspore/train/serialization.py | 7 +- tests/ut/python/ops/test_ops.py | 32 +++---- tests/ut/python/train/quant/test_quant.py | 14 ++- 10 files changed, 90 insertions(+), 88 deletions(-) diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 49bebfb3c42..21d20c893fd 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) { MS_LOG(INFO) << "End save compiled func graph!"; } -bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const { - std::string phase_prefix = GetPhasePrefix(phase_s); - - if (use_vm && phase_prefix == "export") { - MS_LOG(INFO) << "Use ge backend to export geir"; - use_vm = false; - } - return use_vm; -} - void ExecutorPy::GetGeBackendPolicy() const { auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); @@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const { } } +bool IsPhaseExportGeir(const std::string &phase_s) { + auto phase_to_export = "export.geir"; + return phase_s.rfind(phase_to_export, 0) != std::string::npos; +} + +std::vector GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) { + bool is_geir = IsPhaseExportGeir(phase_s); + + std::string backend = MsContext::GetInstance()->backend_policy(); + +#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) + if (mindspore::parallel::ps::Util::IsParamServerMode()) { + mindspore::parallel::ps::Util::SetInternalEnvVar(); + } + if (parallel::ps::Util::IsRoleOfPServer()) { + resource->results()[kBackend] = compile::CreateBackend(); + return PServerPipeline(); + } + if (parallel::ps::Util::IsRoleOfScheduler()) { + return PSchedulerPipeline(); + } +#endif + + if (use_vm && backend != "ge" && !is_geir) { + // Create backend and session + auto backend_ptr = compile::CreateBackend(); + // Connect session to debugger + backend_ptr->SetDebugger(); + resource->results()[kBackend] = backend_ptr; + return VmPipeline(); + } + return GePipeline(); +} + bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) { MS_LOG(DEBUG) << "Start ExecutorPy compile!"; if ((!py::isinstance(phase))) { @@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons std::string phase_s = py::cast(phase); MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!"; ResourcePtr resource = std::make_shared(obj); - std::vector p_actions; - - use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s); - - std::string backend = MsContext::GetInstance()->backend_policy(); -#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES) - if (mindspore::parallel::ps::Util::IsParamServerMode()) { - mindspore::parallel::ps::Util::SetInternalEnvVar(); - } - if (parallel::ps::Util::IsRoleOfPServer()) { - resource->results()[kBackend] = compile::CreateBackend(); - p_actions = PServerPipeline(); - } else if (parallel::ps::Util::IsRoleOfScheduler()) { - p_actions = PSchedulerPipeline(); - } else if (use_vm && backend != "ge") { - // Create backend and session - auto backend_ptr = compile::CreateBackend(); - // Connect session to debugger - backend_ptr->SetDebugger(); - resource->results()[kBackend] = backend_ptr; - p_actions = VmPipeline(); - } else { - p_actions = GePipeline(); - } -#else - if (use_vm && backend != "ge") { - // Create backend and session - auto backend_ptr = compile::CreateBackend(); - // Connect session to debugger - backend_ptr->SetDebugger(); - resource->results()[kBackend] = backend_ptr; - p_actions = VmPipeline(); - } else { - p_actions = GePipeline(); - } -#endif + auto p_actions = GetPipline(resource, phase_s, use_vm); std::shared_ptr pip = std::make_shared(resource, FilterActions(p_actions, phase_s)); // get the parameters items and add the value to args_spec @@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons } std::vector ExecutorPy::FilterActions(const std::vector &actions, const std::string &phase) { - // phase does not contain 'export_onnx' - if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) { + // filter action after validate when 'export'. + if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) { return actions; } MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'"; diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h index 705853d0860..d018d736231 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.h +++ b/mindspore/ccsrc/pipeline/jit/pipeline.h @@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this { private: ExecutorPy(); void ConvertObjectToTensors(const py::dict &dict, std::map *tensors); - bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const; void GetGeBackendPolicy() const; // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after // 'validate' stage diff --git a/mindspore/ccsrc/transform/graph_ir/convert.cc b/mindspore/ccsrc/transform/graph_ir/convert.cc index 56028bbdd90..132fabe561b 100644 --- a/mindspore/ccsrc/transform/graph_ir/convert.cc +++ b/mindspore/ccsrc/transform/graph_ir/convert.cc @@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss"; const char kNameCTCLoss[] = "CTCLoss"; const char kNameRange[] = "Range"; const char kNameSquareSumAll[] = "SquareSumAll"; -const char kNameAscendQuant[] = "AscendQuant"; -const char kNameAscendDequant[] = "AscendDequant"; +const char kNameAscendQuant[] = "Quant"; +const char kNameAscendDequant[] = "Dequant"; const char kNameCase[] = "Case"; // -----------------OpAdapter initialization-------------- diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py index 2f4f2032904..dc30d33ac18 100644 --- a/mindspore/nn/layer/quant.py +++ b/mindspore/nn/layer/quant.py @@ -1107,7 +1107,7 @@ class QuantBlock(Cell): r""" A quant block of Conv/Dense, activation layer for Ascend deploy. - Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant. + Calculate Conv or Dense in Int8, with Quant and DeQuant. Notes: This block is only for deploy, and not trainable. diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py index 3c5e34e25e5..014998b4bee 100644 --- a/mindspore/ops/operations/_inner_ops.py +++ b/mindspore/ops/operations/_inner_ops.py @@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer): return x_dtype -class AscendQuant(PrimitiveWithInfer): +class Quant(PrimitiveWithInfer): r""" Returns the quantized value of input_x. @@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer): Examples: >>> input_x = Tensor([100.0, 150.0], mstype.float32) - >>> quant = P.AscendQuant(80.0, 0.0, False, "Round") + >>> quant = P.Quant(80.0, 0.0, False, "Round") >>> y = quant(input_x) """ @@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer): return mstype.int8 -class AscendDequant(PrimitiveWithInfer): +class Dequant(PrimitiveWithInfer): r""" Returns the dequantized value of input_x. This operation will do ReLU to the dequantized value if `relu_flag` is True. @@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer): Examples: >>> input_x = Tensor([100.0, 150.0], mstype.float32) - >>> dequant = P.AscendDequant(False, False) + >>> dequant = P.Dequant(False, False) >>> y = dequant(input_x) """ @prim_attr_register diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py index b553373f105..4048525029e 100644 --- a/mindspore/train/quant/quant.py +++ b/mindspore/train/quant/quant.py @@ -329,14 +329,14 @@ class ExportToQuantInferNetwork: return None # Build the `Quant` `Dequant` op. - # AscendQuant only support perlayer version. Need check here. - quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in)) + # Quant only support perlayer version. Need check here. + quant_op = inner.Quant(float(scale_a_in), float(zp_a_in)) sqrt_mode = False scale_deq = scale_a_out * scale_w if (scale_deq < 2 ** -14).all(): scale_deq = np.sqrt(scale_deq) sqrt_mode = True - dequant_op = inner.AscendDequant(sqrt_mode) + dequant_op = inner.Dequant(sqrt_mode) # get op op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv @@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' file_name (str): File name of model to export. mean (int): Input data mean. Default: 127.5. std_dev (int, float): Input data variance. Default: 127.5. - file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model. - - GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model. + file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported + quantization aware model. Default: 'GEIR'. + + - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of + Ascend model. + - BINARY: Binary format for model. An intermidiate representation format for models. """ supported_device = ["Ascend"] - supported_formats = ['GEIR'] + supported_formats = ['GEIR', 'BINARY'] mean = validator.check_type("mean", mean, (int, float)) std_dev = validator.check_type("std_dev", std_dev, (int, float)) @@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format=' network.set_train(False) - if file_format == 'GEIR': - exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) - deploy_net = exporter.run() - serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) + exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs) + deploy_net = exporter.run() + serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format) def convert_quant_network(network, diff --git a/mindspore/train/quant/quant_utils.py b/mindspore/train/quant/quant_utils.py index 69505970fd8..5d524391bef 100644 --- a/mindspore/train/quant/quant_utils.py +++ b/mindspore/train/quant/quant_utils.py @@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point): raise ValueError("`scale` and `zero_point` should have the same shape.") if scale.shape[0] < 0: raise ValueError("`scale` and `zero_point` shape should greater than zero.") - if len(scale.shape) > 1: + if len(scale.shape) >= 1 and scale.shape[0] > 1: # for perchannel if scale.shape[0] == data.shape[0]: # `Conv2d` or `Dense` op weight diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index bd1cdab43d3..9d77a920b66 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -451,19 +451,20 @@ def export(net, *inputs, file_name, file_format='GEIR'): # export model net.init_parameters_data() if file_format == 'GEIR': - _executor.compile(net, *inputs, phase='export') + phase_name = 'export.geir' + _executor.compile(net, *inputs, phase=phase_name) _executor.export(net, file_name, file_format) elif file_format == 'ONNX': # file_format is 'ONNX' # NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline, # do not change it to other values. - phase_name = 'export_onnx' + phase_name = 'export.onnx' graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) onnx_stream = _executor._get_func_graph_proto(graph_id) with open(file_name, 'wb') as f: os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) f.write(onnx_stream) elif file_format == 'BINARY': # file_format is 'BINARY' - phase_name = 'export_binary' + phase_name = 'export.binary' graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False) onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir') with open(file_name, 'wb') as f: diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index f25196eef75..ef5b0953362 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -2180,36 +2180,36 @@ test_case_other_ops = [ ] test_case_quant_ops = [ - ('AscendQuant_1', { - 'block': inner.AscendQuant(0.5, 0.0, False, "Round"), + ('Quant_1', { + 'block': inner.Quant(0.5, 0.0, False, "Round"), 'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_2', { - 'block': inner.AscendQuant(80.0, 10.0, True, "Round"), + ('Quant_2', { + 'block': inner.Quant(80.0, 10.0, True, "Round"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_3', { - 'block': inner.AscendQuant(80.0, 0.0, False, "Floor"), + ('Quant_3', { + 'block': inner.Quant(80.0, 0.0, False, "Floor"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_4', { - 'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"), + ('Quant_4', { + 'block': inner.Quant(80.0, 0.0, False, "Ceil"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_5', { - 'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"), + ('Quant_5', { + 'block': inner.Quant(80.0, 0.0, False, "Trunc"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_6', { - 'block': inner.AscendQuant(-80.0, 10.0, False, "Round"), + ('Quant_6', { + 'block': inner.Quant(-80.0, 10.0, False, "Round"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_7', { - 'block': inner.AscendQuant(80.0, -10.0, False, "Round"), + ('Quant_7', { + 'block': inner.Quant(80.0, -10.0, False, "Round"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)], 'skip': ['backward']}), - ('AscendQuant_8', { - 'block': inner.AscendQuant(80.0, 10.0, False, "Round"), + ('Quant_8', { + 'block': inner.Quant(80.0, 10.0, False, "Round"), 'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)], 'skip': ['backward']}), ] diff --git a/tests/ut/python/train/quant/test_quant.py b/tests/ut/python/train/quant/test_quant.py index 39e887170ca..4816af89360 100644 --- a/tests/ut/python/train/quant/test_quant.py +++ b/tests/ut/python/train/quant/test_quant.py @@ -75,10 +75,20 @@ def test_qat_lenet(): @pytest.mark.skip(reason="no `te.lang.cce` in ut env") -def test_qat_mobile(): +def test_qat_mobile_per_channel_tf(): network = mobilenetV2(num_classes=1000) img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) - network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) + network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False]) + # should load the checkpoint. mock here + for param in network.get_parameters(): + param.init_data() + qat.export(network, img, file_name="quant.pb") + +@pytest.mark.skip(reason="no `te.lang.cce` in ut env") +def test_qat_mobile_per_channel_ff(): + network = mobilenetV2(num_classes=1000) + img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) + network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False]) # should load the checkpoint. mock here for param in network.get_parameters(): param.init_data()