forked from mindspore-Ecosystem/mindspore
!3130 [dump]support dump quant in binary format file
Merge pull request !3130 from vlne-v1/quant_op_depthwise
This commit is contained in:
commit
6dd99ee35d
|
@ -383,16 +383,6 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
|
|||
MS_LOG(INFO) << "End save compiled func graph!";
|
||||
}
|
||||
|
||||
bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
|
||||
std::string phase_prefix = GetPhasePrefix(phase_s);
|
||||
|
||||
if (use_vm && phase_prefix == "export") {
|
||||
MS_LOG(INFO) << "Use ge backend to export geir";
|
||||
use_vm = false;
|
||||
}
|
||||
return use_vm;
|
||||
}
|
||||
|
||||
void ExecutorPy::GetGeBackendPolicy() const {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
|
@ -402,6 +392,40 @@ void ExecutorPy::GetGeBackendPolicy() const {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsPhaseExportGeir(const std::string &phase_s) {
|
||||
auto phase_to_export = "export.geir";
|
||||
return phase_s.rfind(phase_to_export, 0) != std::string::npos;
|
||||
}
|
||||
|
||||
std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::string &phase_s, bool use_vm) {
|
||||
bool is_geir = IsPhaseExportGeir(phase_s);
|
||||
|
||||
std::string backend = MsContext::GetInstance()->backend_policy();
|
||||
|
||||
#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
|
||||
if (mindspore::parallel::ps::Util::IsParamServerMode()) {
|
||||
mindspore::parallel::ps::Util::SetInternalEnvVar();
|
||||
}
|
||||
if (parallel::ps::Util::IsRoleOfPServer()) {
|
||||
resource->results()[kBackend] = compile::CreateBackend();
|
||||
return PServerPipeline();
|
||||
}
|
||||
if (parallel::ps::Util::IsRoleOfScheduler()) {
|
||||
return PSchedulerPipeline();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (use_vm && backend != "ge" && !is_geir) {
|
||||
// Create backend and session
|
||||
auto backend_ptr = compile::CreateBackend();
|
||||
// Connect session to debugger
|
||||
backend_ptr->SetDebugger();
|
||||
resource->results()[kBackend] = backend_ptr;
|
||||
return VmPipeline();
|
||||
}
|
||||
return GePipeline();
|
||||
}
|
||||
|
||||
bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) {
|
||||
MS_LOG(DEBUG) << "Start ExecutorPy compile!";
|
||||
if ((!py::isinstance<py::str>(phase))) {
|
||||
|
@ -420,43 +444,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
|
|||
std::string phase_s = py::cast<std::string>(phase);
|
||||
MS_LOG(INFO) << "ExecutorPy compile phase:" << phase_s << "!";
|
||||
ResourcePtr resource = std::make_shared<Resource>(obj);
|
||||
std::vector<ActionItem> p_actions;
|
||||
|
||||
use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s);
|
||||
|
||||
std::string backend = MsContext::GetInstance()->backend_policy();
|
||||
#if (!_WIN32 && !ENABLE_GE && !ENABLE_TESTCASES)
|
||||
if (mindspore::parallel::ps::Util::IsParamServerMode()) {
|
||||
mindspore::parallel::ps::Util::SetInternalEnvVar();
|
||||
}
|
||||
if (parallel::ps::Util::IsRoleOfPServer()) {
|
||||
resource->results()[kBackend] = compile::CreateBackend();
|
||||
p_actions = PServerPipeline();
|
||||
} else if (parallel::ps::Util::IsRoleOfScheduler()) {
|
||||
p_actions = PSchedulerPipeline();
|
||||
} else if (use_vm && backend != "ge") {
|
||||
// Create backend and session
|
||||
auto backend_ptr = compile::CreateBackend();
|
||||
// Connect session to debugger
|
||||
backend_ptr->SetDebugger();
|
||||
resource->results()[kBackend] = backend_ptr;
|
||||
p_actions = VmPipeline();
|
||||
} else {
|
||||
p_actions = GePipeline();
|
||||
}
|
||||
#else
|
||||
if (use_vm && backend != "ge") {
|
||||
// Create backend and session
|
||||
auto backend_ptr = compile::CreateBackend();
|
||||
// Connect session to debugger
|
||||
backend_ptr->SetDebugger();
|
||||
resource->results()[kBackend] = backend_ptr;
|
||||
p_actions = VmPipeline();
|
||||
} else {
|
||||
p_actions = GePipeline();
|
||||
}
|
||||
#endif
|
||||
|
||||
auto p_actions = GetPipline(resource, phase_s, use_vm);
|
||||
std::shared_ptr<Pipeline> pip = std::make_shared<Pipeline>(resource, FilterActions(p_actions, phase_s));
|
||||
|
||||
// get the parameters items and add the value to args_spec
|
||||
|
@ -490,8 +479,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
|
|||
}
|
||||
|
||||
std::vector<ActionItem> ExecutorPy::FilterActions(const std::vector<ActionItem> &actions, const std::string &phase) {
|
||||
// phase does not contain 'export_onnx'
|
||||
if (GetPhasePrefix(phase).find("export_onnx") == std::string::npos) {
|
||||
// filter action after validate when 'export'.
|
||||
if (GetPhasePrefix(phase).rfind("export", 0) == std::string::npos) {
|
||||
return actions;
|
||||
}
|
||||
MS_LOG(INFO) << "Phase is '" << phase << "', filter out actions after stage 'validate'";
|
||||
|
|
|
@ -101,7 +101,6 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
|
|||
private:
|
||||
ExecutorPy();
|
||||
void ConvertObjectToTensors(const py::dict &dict, std::map<std::string, tensor::TensorPtr> *tensors);
|
||||
bool ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const;
|
||||
void GetGeBackendPolicy() const;
|
||||
// filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
|
||||
// 'validate' stage
|
||||
|
|
|
@ -205,8 +205,8 @@ const char kNameL2Loss[] = "L2Loss";
|
|||
const char kNameCTCLoss[] = "CTCLoss";
|
||||
const char kNameRange[] = "Range";
|
||||
const char kNameSquareSumAll[] = "SquareSumAll";
|
||||
const char kNameAscendQuant[] = "AscendQuant";
|
||||
const char kNameAscendDequant[] = "AscendDequant";
|
||||
const char kNameAscendQuant[] = "Quant";
|
||||
const char kNameAscendDequant[] = "Dequant";
|
||||
const char kNameCase[] = "Case";
|
||||
|
||||
// -----------------OpAdapter initialization--------------
|
||||
|
|
|
@ -1107,7 +1107,7 @@ class QuantBlock(Cell):
|
|||
r"""
|
||||
A quant block of Conv/Dense, activation layer for Ascend deploy.
|
||||
|
||||
Calculate Conv or Dense in Int8, with AscendQuant and AscendDeQuant.
|
||||
Calculate Conv or Dense in Int8, with Quant and DeQuant.
|
||||
|
||||
Notes:
|
||||
This block is only for deploy, and not trainable.
|
||||
|
|
|
@ -160,7 +160,7 @@ class Range(PrimitiveWithInfer):
|
|||
return x_dtype
|
||||
|
||||
|
||||
class AscendQuant(PrimitiveWithInfer):
|
||||
class Quant(PrimitiveWithInfer):
|
||||
r"""
|
||||
Returns the quantized value of input_x.
|
||||
|
||||
|
@ -192,7 +192,7 @@ class AscendQuant(PrimitiveWithInfer):
|
|||
|
||||
Examples:
|
||||
>>> input_x = Tensor([100.0, 150.0], mstype.float32)
|
||||
>>> quant = P.AscendQuant(80.0, 0.0, False, "Round")
|
||||
>>> quant = P.Quant(80.0, 0.0, False, "Round")
|
||||
>>> y = quant(input_x)
|
||||
"""
|
||||
|
||||
|
@ -213,7 +213,7 @@ class AscendQuant(PrimitiveWithInfer):
|
|||
return mstype.int8
|
||||
|
||||
|
||||
class AscendDequant(PrimitiveWithInfer):
|
||||
class Dequant(PrimitiveWithInfer):
|
||||
r"""
|
||||
Returns the dequantized value of input_x.
|
||||
This operation will do ReLU to the dequantized value if `relu_flag` is True.
|
||||
|
@ -245,7 +245,7 @@ class AscendDequant(PrimitiveWithInfer):
|
|||
|
||||
Examples:
|
||||
>>> input_x = Tensor([100.0, 150.0], mstype.float32)
|
||||
>>> dequant = P.AscendDequant(False, False)
|
||||
>>> dequant = P.Dequant(False, False)
|
||||
>>> y = dequant(input_x)
|
||||
"""
|
||||
@prim_attr_register
|
||||
|
|
|
@ -329,14 +329,14 @@ class ExportToQuantInferNetwork:
|
|||
return None
|
||||
|
||||
# Build the `Quant` `Dequant` op.
|
||||
# AscendQuant only support perlayer version. Need check here.
|
||||
quant_op = inner.AscendQuant(float(scale_a_in), float(zp_a_in))
|
||||
# Quant only support perlayer version. Need check here.
|
||||
quant_op = inner.Quant(float(scale_a_in), float(zp_a_in))
|
||||
sqrt_mode = False
|
||||
scale_deq = scale_a_out * scale_w
|
||||
if (scale_deq < 2 ** -14).all():
|
||||
scale_deq = np.sqrt(scale_deq)
|
||||
sqrt_mode = True
|
||||
dequant_op = inner.AscendDequant(sqrt_mode)
|
||||
dequant_op = inner.Dequant(sqrt_mode)
|
||||
|
||||
# get op
|
||||
op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
|
||||
|
@ -411,11 +411,15 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
|
|||
file_name (str): File name of model to export.
|
||||
mean (int): Input data mean. Default: 127.5.
|
||||
std_dev (int, float): Input data variance. Default: 127.5.
|
||||
file_format (str): MindSpore currently supports 'GEIR' format for exported quantization aware model.
|
||||
- GEIR: Graph Engine Intermediate Representation. An Intermediate representation format of Ascend model.
|
||||
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported
|
||||
quantization aware model. Default: 'GEIR'.
|
||||
|
||||
- GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
|
||||
Ascend model.
|
||||
- BINARY: Binary format for model. An intermidiate representation format for models.
|
||||
"""
|
||||
supported_device = ["Ascend"]
|
||||
supported_formats = ['GEIR']
|
||||
supported_formats = ['GEIR', 'BINARY']
|
||||
|
||||
mean = validator.check_type("mean", mean, (int, float))
|
||||
std_dev = validator.check_type("std_dev", std_dev, (int, float))
|
||||
|
@ -428,7 +432,6 @@ def export(network, *inputs, file_name, mean=127.5, std_dev=127.5, file_format='
|
|||
|
||||
network.set_train(False)
|
||||
|
||||
if file_format == 'GEIR':
|
||||
exporter = ExportToQuantInferNetwork(network, mean, std_dev, *inputs)
|
||||
deploy_net = exporter.run()
|
||||
serialization.export(deploy_net, *inputs, file_name=file_name, file_format=file_format)
|
||||
|
|
|
@ -104,7 +104,7 @@ def weight2int(data, scale, zero_point):
|
|||
raise ValueError("`scale` and `zero_point` should have the same shape.")
|
||||
if scale.shape[0] < 0:
|
||||
raise ValueError("`scale` and `zero_point` shape should greater than zero.")
|
||||
if len(scale.shape) > 1:
|
||||
if len(scale.shape) >= 1 and scale.shape[0] > 1:
|
||||
# for perchannel
|
||||
if scale.shape[0] == data.shape[0]:
|
||||
# `Conv2d` or `Dense` op weight
|
||||
|
|
|
@ -454,19 +454,20 @@ def export(net, *inputs, file_name, file_format='GEIR'):
|
|||
# export model
|
||||
net.init_parameters_data()
|
||||
if file_format == 'GEIR':
|
||||
_executor.compile(net, *inputs, phase='export')
|
||||
phase_name = 'export.geir'
|
||||
_executor.compile(net, *inputs, phase=phase_name)
|
||||
_executor.export(net, file_name, file_format)
|
||||
elif file_format == 'ONNX': # file_format is 'ONNX'
|
||||
# NOTICE: the pahse name `export_onnx` is used for judging whether is exporting onnx in the compile pipeline,
|
||||
# do not change it to other values.
|
||||
phase_name = 'export_onnx'
|
||||
phase_name = 'export.onnx'
|
||||
graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
|
||||
onnx_stream = _executor._get_func_graph_proto(graph_id)
|
||||
with open(file_name, 'wb') as f:
|
||||
os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
|
||||
f.write(onnx_stream)
|
||||
elif file_format == 'BINARY': # file_format is 'BINARY'
|
||||
phase_name = 'export_binary'
|
||||
phase_name = 'export.binary'
|
||||
graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
|
||||
onnx_stream = _executor._get_func_graph_proto(graph_id, 'binary_ir')
|
||||
with open(file_name, 'wb') as f:
|
||||
|
|
|
@ -2180,36 +2180,36 @@ test_case_other_ops = [
|
|||
]
|
||||
|
||||
test_case_quant_ops = [
|
||||
('AscendQuant_1', {
|
||||
'block': inner.AscendQuant(0.5, 0.0, False, "Round"),
|
||||
('Quant_1', {
|
||||
'block': inner.Quant(0.5, 0.0, False, "Round"),
|
||||
'desc_inputs': [Tensor(np.random.rand(1, 2, 4, 4), mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_2', {
|
||||
'block': inner.AscendQuant(80.0, 10.0, True, "Round"),
|
||||
('Quant_2', {
|
||||
'block': inner.Quant(80.0, 10.0, True, "Round"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_3', {
|
||||
'block': inner.AscendQuant(80.0, 0.0, False, "Floor"),
|
||||
('Quant_3', {
|
||||
'block': inner.Quant(80.0, 0.0, False, "Floor"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_4', {
|
||||
'block': inner.AscendQuant(80.0, 0.0, False, "Ceil"),
|
||||
('Quant_4', {
|
||||
'block': inner.Quant(80.0, 0.0, False, "Ceil"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_5', {
|
||||
'block': inner.AscendQuant(80.0, 0.0, False, "Trunc"),
|
||||
('Quant_5', {
|
||||
'block': inner.Quant(80.0, 0.0, False, "Trunc"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_6', {
|
||||
'block': inner.AscendQuant(-80.0, 10.0, False, "Round"),
|
||||
('Quant_6', {
|
||||
'block': inner.Quant(-80.0, 10.0, False, "Round"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_7', {
|
||||
'block': inner.AscendQuant(80.0, -10.0, False, "Round"),
|
||||
('Quant_7', {
|
||||
'block': inner.Quant(80.0, -10.0, False, "Round"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
('AscendQuant_8', {
|
||||
'block': inner.AscendQuant(80.0, 10.0, False, "Round"),
|
||||
('Quant_8', {
|
||||
'block': inner.Quant(80.0, 10.0, False, "Round"),
|
||||
'desc_inputs': [Tensor([100.0, 200.0], mstype.float16)],
|
||||
'skip': ['backward']}),
|
||||
]
|
||||
|
|
|
@ -75,10 +75,20 @@ def test_qat_lenet():
|
|||
|
||||
|
||||
@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
|
||||
def test_qat_mobile():
|
||||
def test_qat_mobile_per_channel_tf():
|
||||
network = mobilenetV2(num_classes=1000)
|
||||
img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
|
||||
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
|
||||
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, True], symmetric=[True, False])
|
||||
# should load the checkpoint. mock here
|
||||
for param in network.get_parameters():
|
||||
param.init_data()
|
||||
qat.export(network, img, file_name="quant.pb")
|
||||
|
||||
@pytest.mark.skip(reason="no `te.lang.cce` in ut env")
|
||||
def test_qat_mobile_per_channel_ff():
|
||||
network = mobilenetV2(num_classes=1000)
|
||||
img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32))
|
||||
network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False])
|
||||
# should load the checkpoint. mock here
|
||||
for param in network.get_parameters():
|
||||
param.init_data()
|
||||
|
|
Loading…
Reference in New Issue