!3130 [dump]support dump quant in binary format file

Merge pull request !3130 from vlne-v1/quant_op_depthwise
This commit is contained in:
mindspore-ci-bot 2020-07-17 14:25:57 +08:00 committed by Gitee
commit 6dd99ee35d
10 changed files with 90 additions and 88 deletions

View File

@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
MS_LOG(INFO) << "End save compiled func graph!";
}
bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
std::string phase_prefix = GetPhasePrefix(phase_s);
if (use_vm && phase_prefix == "export") {
MS_LOG(INFO) << "Use ge backend to export geir";
use_vm = false;
}
return use_vm;
}
void ExecutorPy::GetGeBackendPolicy() const {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const {
}
}
bool IsPhaseExportGeir(const std::string &phase_s) {
auto phase_to_export = "export.geir";
return phase_s.rfind(phase_to_export, 0) != std::string::npos;
}
std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) {
bool is_geir = IsPhaseExportGeir(phase_s);
std::string backend = MsContext::GetInstance()->backend_policy();
#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
if (mindspore::parallel::ps::Util::IsParamServerMode()) {
mindspore::parallel::ps::Util::SetInternalEnvVar();
}
if (parallel::ps::Util::IsRoleOfPServer()) {
resource->results()[kBackend] = compile::CreateBackend();
return PServerPipeline();
}
if (parallel::ps::Util::IsRoleOfScheduler()) {
return PSchedulerPipeline();
}
#endif
if (use_vm && backend != "ge" && !is_geir) {
// Create backend and session
auto backend_ptr = compile::CreateBackend();
// Connect session to debugger
backend_ptr->SetDebugger();
resource->results()[kBackend] = backend_ptr;
return VmPipeline();
}
return GePipeline();
}
bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) {
MS_LOG(DEBUG) << "Start ExecutorPy compile!";
if ((!py::isinstance<py::str>(phase))) {
@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
std::string phase_s = py::cast<std::string>(phase);
MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!";
ResourcePtr resource = std::make_shared<Resource>(obj);
std::vector<ActionItem> p_actions;
use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s);
std::string backend = MsContext::GetInstance()->backend_policy();
#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
if (mindspore::parallel::ps::Util::IsParamServerMode()) {
mindspore::parallel::ps::Util::SetInternalEnvVar();
}
if (parallel::ps::Util::IsRoleOfPServer()) {
resource->results()[kBackend] = compile::CreateBackend();
p_actions = PServerPipeline();
} else if (parallel::ps::Util::IsRoleOfScheduler()) {
p_actions = PSchedulerPipeline();
} else if (use_vm && backend != "ge") {
// Create backend and session
auto backend_ptr = compile::CreateBackend();
// Connect session to debugger
backend_ptr->SetDebugger();
resource->results()[kBackend] = backend_ptr;
p_actions = VmPipeline();
} else {
p_actions = GePipeline();
}
#else
if (use_vm && backend != "ge") {
// Create backend and session
auto backend_ptr = compile::CreateBackend();
// Connect session to debugger
backend_ptr->SetDebugger();
resource->results()[kBackend] = backend_ptr;
p_actions = VmPipeline();
} else {
p_actions = GePipeline();
}
#endif
auto p_actions = GetPipline(resource, phase_s, use_vm);
std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));
// get the parameters items and add the value to args_spec
@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
}
std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
// phase does not contain 'export_onnx'
if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
// filter action after validate when 'export'.
if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) {
return actions;
}
MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";

View File

@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
private:
ExecutorPy();
void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
void GetGeBackendPolicy() const;
// filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
// 'validate' stage

View File

@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss";
const char kNameCTCLoss[] = "CTCLoss";
const char kNameRange[] = "Range";
const char kNameSquareSumAll[] = "SquareSumAll";
const char kNameAscendQuant[] = "AscendQuant";
const char kNameAscendDequant[] = "AscendDequant";
const char kNameAscendQuant[] = "Quant";
const char kNameAscendDequant[] = "Dequant";
const char kNameCase[] = "Case";
// -----------------OpAdapter initialization--------------

View File

@ -1107,7 +1107,7 @@ class QuantBlock(Cell):
r"""
A quant block of Conv/Dense, activation layer for Ascend deploy.
Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant.
Calculate Conv or Dense in Int8, with Quant and DeQuant.
Notes:
This block is only for deploy, and not trainable.

View File

@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer):
return x_dtype
class AscendQuant(PrimitiveWithInfer):
class Quant(PrimitiveWithInfer):
r"""
Returns the quantized value of input_x.
@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer):
Examples:
>>> input_x = Tensor([100.0, 150.0], mstype.float32)
>>> quant = P.AscendQuant(80.0, 0.0, False, "Round")
>>> quant = P.Quant(80.0, 0.0, False, "Round")
>>> y = quant(input_x)
"""
@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer):
return mstype.int8
class AscendDequant(PrimitiveWithInfer):
class Dequant(PrimitiveWithInfer):
r"""
Returns the dequantized value of input_x.
This operation will do ReLU to the dequantized value if `relu_flag` is True.
@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer):
Examples:
>>> input_x = Tensor([100.0, 150.0], mstype.float32)
>>> dequant = P.AscendDequant(False, False)
>>> dequant = P.Dequant(False, False)
>>> y = dequant(input_x)
"""
@prim_attr_register

View File

@ -329,14 +329,14 @@ class ExportToQuantInferNetwork:
return None
# Build the `Quant` `Dequant` op.
# AscendQuant only support perlayer version. Need check here.
quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
# Quant only support perlayer version. Need check here.
quant_op = inner.Quant(float(scale_a_in), float(zp_a_in))
sqrt_mode = False
scale_deq = scale_a_out * scale_w
if (scale_deq < 2 ** -14).all():
scale_deq = np.sqrt(scale_deq)
sqrt_mode = True
dequant_op = inner.AscendDequant(sqrt_mode)
dequant_op = inner.Dequant(sqrt_mode)
# get op
op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
file_name (str): File name of model to export.
mean (int): Input data mean. Default: 127.5.
std_dev (int, float): Input data variance. Default: 127.5.
file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
- GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported
quantization aware model. Default: 'GEIR'.
- GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
Ascend model.
- BINARY: Binary format for model. An intermidiate representation format for models.
"""
supported_device = ["Ascend"]
supported_formats = ['GEIR']
supported_formats = ['GEIR', 'BINARY']
mean = validator.check_type("mean", mean, (int, float))
std_dev = validator.check_type("std_dev", std_dev, (int, float))
@ -428,10 +432,9 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
network.set_train(False)
if file_format == 'GEIR':
exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
deploy_net = exporter.run()
serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
deploy_net = exporter.run()
serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
def convert_quant_network(network,

View File

@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point):
raise ValueError("`scale` and `zero_point` should have the same shape.")
if scale.shape[0] < 0:
raise ValueError("`scale` and `zero_point` shape should greater than zero.")
if len(scale.shape) > 1:
if len(scale.shape) >= 1 and scale.shape[0] > 1:
# for perchannel
if scale.shape[0] == data.shape[0]:
# `Conv2d` or `Dense` op weight

View File

@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'):
# export model
net.init_parameters_data()
if file_format == 'GEIR':
_executor.compile(net, *inputs, phase='export')
phase_name = 'export.geir'
_executor.compile(net, *inputs, phase=phase_name)
_executor.export(net, file_name, file_format)
elif file_format == 'ONNX': # file_format is 'ONNX'
# NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
# do not change it to other values.
phase_name = 'export_onnx'
phase_name = 'export.onnx'
graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
onnx_stream = _executor._get_func_graph_proto(graph_id)
with open(file_name, 'wb') as f:
os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
f.write(onnx_stream)
elif file_format == 'BINARY': # file_format is 'BINARY'
phase_name = 'export_binary'
phase_name = 'export.binary'
graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir')
with open(file_name, 'wb') as f:

View File

@ -2180,36 +2180,36 @@ test_case_other_ops = [
]
test_case_quant_ops = [
('AscendQuant_1', {
'block': inner.AscendQuant(0.5, 0.0, False, "Round"),
('Quant_1', {
'block': inner.Quant(0.5, 0.0, False, "Round"),
'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)],
'skip': ['backward']}),
('AscendQuant_2', {
'block': inner.AscendQuant(80.0, 10.0, True, "Round"),
('Quant_2', {
'block': inner.Quant(80.0, 10.0, True, "Round"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_3', {
'block': inner.AscendQuant(80.0, 0.0, False, "Floor"),
('Quant_3', {
'block': inner.Quant(80.0, 0.0, False, "Floor"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_4', {
'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"),
('Quant_4', {
'block': inner.Quant(80.0, 0.0, False, "Ceil"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_5', {
'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"),
('Quant_5', {
'block': inner.Quant(80.0, 0.0, False, "Trunc"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_6', {
'block': inner.AscendQuant(-80.0, 10.0, False, "Round"),
('Quant_6', {
'block': inner.Quant(-80.0, 10.0, False, "Round"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_7', {
'block': inner.AscendQuant(80.0, -10.0, False, "Round"),
('Quant_7', {
'block': inner.Quant(80.0, -10.0, False, "Round"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
'skip': ['backward']}),
('AscendQuant_8', {
'block': inner.AscendQuant(80.0, 10.0, False, "Round"),
('Quant_8', {
'block': inner.Quant(80.0, 10.0, False, "Round"),
'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)],
'skip': ['backward']}),
]

View File

@ -75,10 +75,20 @@ def test_qat_lenet():
@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
def test_qat_mobile():
def test_qat_mobile_per_channel_tf():
network = mobilenetV2(num_classes=1000)
img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False])
# should load the checkpoint. mock here
for param in network.get_parameters():
param.init_data()
qat.export(network, img, file_name="quant.pb")
@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
def test_qat_mobile_per_channel_ff():
network = mobilenetV2(num_classes=1000)
img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False])
# should load the checkpoint. mock here
for param in network.get_parameters():
param.init_data()