From ea0b841653ffa573b8171aab9b765610b9b4ff07 Mon Sep 17 00:00:00 2001 From: hangangqiang Date: Wed, 21 Dec 2022 10:35:59 +0800 Subject: [PATCH] remove compression and testcases --- cmake/package.cmake | 1 - cmake/package_mac.cmake | 1 - cmake/package_win.cmake | 1 - .../api_python/mindspore/mindspore.export.rst | 3 - mindspore/ccsrc/pipeline/jit/init.cc | 2 - mindspore/ccsrc/pipeline/jit/pipeline.cc | 116 - mindspore/ccsrc/pipeline/jit/pipeline.h | 5 - .../optimizer/ascend_backend_optimization.cc | 3 - ...e_learned_scale_quant_grad_unify_mindir.cc | 233 -- ...ke_learned_scale_quant_grad_unify_mindir.h | 72 - .../ccsrc/transform/graph_ir/op_adapter_map.h | 4 - .../elewise_calculation_ops_declare.cc | 35 - .../elewise_calculation_ops_declare.h | 12 - mindspore/python/mindspore/common/api.py | 6 - mindspore/python/mindspore/compression/OWNERS | 4 - .../python/mindspore/compression/__init__.py | 19 - .../mindspore/compression/common/__init__.py | 24 - .../mindspore/compression/common/constant.py | 124 -- .../mindspore/compression/export/__init__.py | 19 - .../compression/export/quant_export.py | 515 ----- .../mindspore/compression/quant/__init__.py | 28 - .../python/mindspore/compression/quant/qat.py | 634 ------ .../compression/quant/quant_utils.py | 462 ---- .../mindspore/compression/quant/quantizer.py | 68 - .../python/mindspore/nn/layer/__init__.py | 4 +- mindspore/python/mindspore/nn/layer/quant.py | 1868 ----------------- .../python/mindspore/train/serialization.py | 65 - tests/st/quantization/lenet_quant/config.py | 31 - tests/st/quantization/lenet_quant/dataset.py | 60 - .../quantization/lenet_quant/lenet_fusion.py | 58 - .../lenet_quant/test_lenet_quant.py | 199 -- .../quantization/mobilenetv2_quant/dataset.py | 67 - .../mobilenetv2_quant/lr_generator.py | 56 - .../mobilenetv2_quant/mobilenetV2.py | 263 --- .../test_mobilenetv2_quant.py | 136 -- .../test_mobilenetv2_quant_gpu.py | 121 -- .../quantization/mobilenetv2_quant/utils.py | 120 -- .../ops/test_Conv2dBnFoldQuant.py | 12 +- .../st/quantization/resnet50_quant/dataset.py | 67 - .../resnet50_quant/lr_generator.py | 93 - .../resnet50_quant/resnet_quant_manual.py | 346 --- .../resnet50_quant/test_resnet50_quant.py | 131 -- tests/st/quantization/resnet50_quant/utils.py | 105 - 43 files changed, 7 insertions(+), 6186 deletions(-) delete mode 100644 mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc delete mode 100644 mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h delete mode 100644 mindspore/python/mindspore/compression/OWNERS delete mode 100644 mindspore/python/mindspore/compression/__init__.py delete mode 100644 mindspore/python/mindspore/compression/common/__init__.py delete mode 100644 mindspore/python/mindspore/compression/common/constant.py delete mode 100644 mindspore/python/mindspore/compression/export/__init__.py delete mode 100644 mindspore/python/mindspore/compression/export/quant_export.py delete mode 100644 mindspore/python/mindspore/compression/quant/__init__.py delete mode 100644 mindspore/python/mindspore/compression/quant/qat.py delete mode 100644 mindspore/python/mindspore/compression/quant/quant_utils.py delete mode 100644 mindspore/python/mindspore/compression/quant/quantizer.py delete mode 100644 mindspore/python/mindspore/nn/layer/quant.py delete mode 100644 tests/st/quantization/lenet_quant/config.py delete mode 100644 tests/st/quantization/lenet_quant/dataset.py delete mode 100644 tests/st/quantization/lenet_quant/lenet_fusion.py delete mode 100644 tests/st/quantization/lenet_quant/test_lenet_quant.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/dataset.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/lr_generator.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/mobilenetV2.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py delete mode 100644 tests/st/quantization/mobilenetv2_quant/utils.py delete mode 100755 tests/st/quantization/resnet50_quant/dataset.py delete mode 100755 tests/st/quantization/resnet50_quant/lr_generator.py delete mode 100644 tests/st/quantization/resnet50_quant/resnet_quant_manual.py delete mode 100755 tests/st/quantization/resnet50_quant/test_resnet50_quant.py delete mode 100644 tests/st/quantization/resnet50_quant/utils.py diff --git a/cmake/package.cmake b/cmake/package.cmake index 84fc18b9246..27b140771d2 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -289,7 +289,6 @@ install( ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler - ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental diff --git a/cmake/package_mac.cmake b/cmake/package_mac.cmake index 7c5550b45ff..2effa7579a3 100644 --- a/cmake/package_mac.cmake +++ b/cmake/package_mac.cmake @@ -164,7 +164,6 @@ install( ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler - ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental diff --git a/cmake/package_win.cmake b/cmake/package_win.cmake index b71bb2372d7..7874a35f69f 100644 --- a/cmake/package_win.cmake +++ b/cmake/package_win.cmake @@ -250,7 +250,6 @@ install( ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler - ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental diff --git a/docs/api/api_python/mindspore/mindspore.export.rst b/docs/api/api_python/mindspore/mindspore.export.rst index 6eeeecf4d80..ac9da4a3783 100644 --- a/docs/api/api_python/mindspore/mindspore.export.rst +++ b/docs/api/api_python/mindspore/mindspore.export.rst @@ -23,9 +23,6 @@ mindspore.export - **kwargs** (dict) - 配置选项字典。 - - **quant_mode** (str) - 如果网络是量化感知训练网络,那么 `quant_mode` 需要设置为"QUANT",否则 `quant_mode` 需要设置为"NONQUANT"。 - - **mean** (float) - 预处理后输入数据的平均值,用于量化网络的第一层。默认值:127.5。 - - **std_dev** (float) - 预处理后输入数据的方差,用于量化网络的第一层。默认值:127.5。 - **enc_key** (str) - 用于加密的字节类型密钥,有效长度为16、24或者32。 - **enc_mode** (Union[str, function]) - 指定加密模式,当设置 `enc_key` 时启用。 diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index a5e7b6cae80..b1dcdce0572 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) { "Get the number of parallel operators.") .def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"), "Get Allreduce Fusion Dictionary.") - .def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"), - "Fetch the inputs of Conv or Matmul for quant export.") .def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"), "Build data graph.") .def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"), diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 8eb18f12704..b6f1e5f1e1a 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() { ConfigManager::GetInstance().ResetConfig(); } -void GraphExecutorPy::GetWeightInfo( - const CNodePtr &root_node, const AnfNodePtr &weight_node, - std::map> *fake_quant_table) const { - MS_EXCEPTION_IF_NULL(root_node); - MS_EXCEPTION_IF_NULL(fake_quant_table); - std::string weight_name; - auto x = root_node->input(1); - MS_EXCEPTION_IF_NULL(x); - if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) { - weight_name = weight_node->cast_ptr()->input(1)->cast_ptr()->name(); - } else { - auto para = weight_node->cast_ptr(); - MS_EXCEPTION_IF_NULL(para); - weight_name = para->name(); - } - // find the fakequant from input - int64_t count = 0; - const int64_t max_depth = 5; - auto is_quant_cnode = [](const AnfNodePtr &node) { - return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) || - IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) || - IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) || - IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel); - }; - while (!is_quant_cnode(x)) { - if (count >= max_depth) { - break; - } - auto cnode = x->cast_ptr(); - if (cnode == nullptr || cnode->size() <= 1) { - break; - } - x = cnode->input(1); - count += 1; - } - if (x->isa() || IsPrimitiveCNode(x, prim::kPrimLoad)) { - (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input"); - } - // get the fakequant parameter minq's name - if (!is_quant_cnode(x)) { - return; - } - auto cnode = x->cast_ptr(); - constexpr size_t expect_input_size = 4; - if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) { - return; - } - const size_t fakequant_index = 2; - auto fakequant_min_node = cnode->input(fakequant_index); - if (!fakequant_min_node->isa() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) { - return; - } - std::string fakequant_min_node_name; - if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) { - fakequant_min_node_name = fakequant_min_node->cast_ptr()->input(1)->cast_ptr()->name(); - } else { - auto param = fakequant_min_node->cast_ptr(); - MS_EXCEPTION_IF_NULL(param); - fakequant_min_node_name = param->name(); - } - auto quant_op = GetValuePtr(cnode->input(0)); - if (quant_op == nullptr) { - return; - } - (*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name); -} - -std::map> GraphExecutorPy::FetchInfoForQuantExport( - const std::string &phase) { - FuncGraphPtr func_graph = info_[phase]->resource->func_graph(); - MS_EXCEPTION_IF_NULL(func_graph); - MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!"; - std::map> fake_quant_table; - auto filter = [](const AnfNodePtr &node) { - return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) || - IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative)); - }; - std::vector nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter); - auto is_quant_cnode = [](const AnfNodePtr &node) { - return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) || - IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) || - IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) || - IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel); - }; - const size_t root_node_size = 3; - const size_t weight_index = 2; - for (const auto &node : nodes) { - auto root_node = node->cast(); - if (root_node == nullptr || root_node->size() != root_node_size) { - continue; - } - auto weight = root_node->input(weight_index); - if (!is_quant_cnode(weight)) { - auto tuple_node = weight->cast_ptr(); - if (tuple_node != nullptr) { - auto fake_node = tuple_node->input(1); - if (!is_quant_cnode(fake_node)) { - continue; - } else { - weight = fake_node; - } - } - } - // get parameter weight's name - auto cnode = weight->cast_ptr(); - MS_EXCEPTION_IF_NULL(cnode); - auto weight_node = cnode->input(weight_index); - MS_EXCEPTION_IF_NULL(weight_node); - if (!weight_node->isa() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) { - continue; - } - GetWeightInfo(root_node, weight_node, &fake_quant_table); - } - return fake_quant_table; -} - void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) { // save the graph to GraphExecutorPy FuncGraphPtr func_graph = info_[phase]->resource->func_graph(); diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h index b50e89d8e88..cd7ab4d3c63 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.h +++ b/mindspore/ccsrc/pipeline/jit/pipeline.h @@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this { void TerminateDebugger(); #endif - std::map> FetchInfoForQuantExport( - const std::string &phase); - // Generate a key for mapping function graph py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false); @@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this { private: GraphExecutorPy() = default; - void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node, - std::map> *fake_quant_table) const; void ParallelPostProcess(const string &phase); void GetGeBackendPolicy() const; // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc index f56c794f437..3d424d9f2f9 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc @@ -161,7 +161,6 @@ #include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h" #include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h" -#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h" @@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr &kernel_graph unify_mindir_pm->AddPass(std::make_shared()); unify_mindir_pm->AddPass(std::make_shared()); unify_mindir_pm->AddPass(std::make_shared()); - unify_mindir_pm->AddPass(std::make_shared()); - unify_mindir_pm->AddPass(std::make_shared()); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); if (ms_context->get_param(MS_CTX_EXECUTION_MODE) == kGraphMode) { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc deleted file mode 100644 index 1f41d68aa26..00000000000 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc +++ /dev/null @@ -1,233 +0,0 @@ -/** - * Copyright 2022 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h" - -#include -#include - -#include "include/common/utils/utils.h" -#include "utils/ms_context.h" -#include "backend/common/optimizer/helper.h" -#include "runtime/device/kernel_info.h" -#include "backend/common/session/anf_runtime_algorithm.h" -#include "include/common/utils/anfalgo.h" -#include "utils/trace_base.h" - -namespace mindspore { -namespace opt { -void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD( - const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node, - std::vector *const lsq_perlayer_grad_d_outputs) const { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node); - const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs(); - if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) { - MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than " - << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size() - << trace::DumpSourceLines(lsq_perlayer_grad_node); - } - std::vector lsq_perlayer_grad_d_inputs = { - NewValueNode(std::make_shared(kFakeLearnedScaleQuantPerLayerGradDOpName)), - lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3], - lsq_perlayer_grad_inputs[kIndex4]}; - auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph); - MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d); - lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope()); - - auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL), - common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)}; - auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL), - common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)}; - common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get()); - - common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d); - CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum, - lsq_perlayer_grad_d_outputs); -} - -void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad( - const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node, - const std::vector &lsq_perlayer_grad_d_outputs, - std::vector *const lsq_perlayer_reduce_grad_outputs) const { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node); - MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs); - const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs(); - if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) { - MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than " - << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size() - << trace::DumpSourceLines(lsq_perlayer_grad_node); - } - if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) { - MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be " - << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size() - << trace::DumpSourceLines(lsq_perlayer_grad_node); - } - std::vector lsq_perlayer_reduce_grad_inputs = { - NewValueNode(std::make_shared(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)), - lsq_perlayer_grad_d_outputs[kIndex1]}; - auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph); - MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad); - lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope()); - - auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)}; - auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)}; - common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get()); - - (*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad); -} - -void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD( - const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node, - std::vector *const lsq_perchannel_grad_d_outputs) const { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node); - const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs(); - if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) { - MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than " - << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size() - << trace::DumpSourceLines(lsq_perchannel_grad_node); - } - std::vector lsq_perchannel_grad_d_inputs = { - NewValueNode(std::make_shared(kFakeLearnedScaleQuantPerChannelGradDOpName)), - lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3], - lsq_perchannel_grad_inputs[kIndex4]}; - auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph); - MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d); - lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope()); - - auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL), - common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)}; - auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL), - common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)}; - common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get()); - - common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d); - common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d); - CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum, - lsq_perchannel_grad_d_outputs); -} - -void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad( - const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node, - const std::vector &lsq_perchannel_grad_d_outputs, - std::vector *const lsq_perchannel_reduce_grad_outputs) const { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node); - MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs); - const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs(); - if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) { - MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than " - << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size() - << trace::DumpSourceLines(lsq_perchannel_grad_node); - } - if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) { - MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be " - << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size() - << trace::DumpSourceLines(lsq_perchannel_grad_node); - } - std::vector lsq_perchannel_reduce_grad_inputs = { - NewValueNode(std::make_shared(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)), - lsq_perchannel_grad_d_outputs[kIndex1]}; - auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph); - MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad); - lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope()); - - auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)}; - auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)}; - common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get()); - common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad); - (*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad); -} - -const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const { - VarPtr Xs = std::make_shared(); - auto prim = std::make_shared(kFakeLearnedScaleQuantPerLayerGradOpName); - return VectorRef({prim, Xs}); -} - -const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph, - const AnfNodePtr &node, const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(func_graph); - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(primitive); - - std::vector lsq_perlayer_grad_d_outputs; - CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs); - if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) { - MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be " - << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size() - << trace::DumpSourceLines(node); - } - - std::vector lsq_perlayer_reduce_grad_outputs; - CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs, - &lsq_perlayer_reduce_grad_outputs); - if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) { - MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be " - << kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size() - << trace::DumpSourceLines(node); - } - - std::vector make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0], - lsq_perlayer_reduce_grad_outputs[0]}; - auto make_tuple = func_graph->NewCNode(make_tuple_inputs); - return make_tuple; -} - -const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const { - VarPtr Xs = std::make_shared(); - auto prim = std::make_shared(kFakeLearnedScaleQuantPerChannelGradOpName); - return VectorRef({prim, Xs}); -} - -const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph, - const AnfNodePtr &node, - const EquivPtr &) const { - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(func_graph); - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(primitive); - - std::vector lsq_perchannel_grad_d_outputs; - CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs); - if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) { - MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be " - << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size() - << trace::DumpSourceLines(node); - } - - std::vector lsq_perchannel_reduce_grad_outputs; - CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs, - &lsq_perchannel_reduce_grad_outputs); - if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) { - MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be " - << kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size() - << trace::DumpSourceLines(node); - } - - std::vector make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0], - lsq_perchannel_reduce_grad_outputs[0]}; - auto make_tuple = func_graph->NewCNode(make_tuple_inputs); - return make_tuple; -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h deleted file mode 100644 index 79d05ef7717..00000000000 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright 2022 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_ -#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_ - -#include -#include "backend/common/optimizer/optimizer.h" -#include "backend/common/optimizer/helper.h" - -namespace mindspore { -namespace opt { -constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2; -constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5; -constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2; -constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad"; -constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD"; -constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce"; -constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad"; -constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD"; -constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce"; - -constexpr auto kAttrNeg_trunc = "neg_trunc"; -constexpr auto kAttrChannelAxis = "channel_axis"; - -class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass { - public: - explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true) - : PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {} - ~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default; - const BaseRef DefinePattern() const override; - const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; - - private: - void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node, - std::vector *const lsq_perlayer_grad_d_outputs) const; - void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node, - const std::vector &lsq_perlayer_grad_d_outputs, - std::vector *const lsq_perlayer_reduce_grad_outputs) const; -}; - -class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass { - public: - explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true) - : PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {} - ~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default; - const BaseRef DefinePattern() const override; - const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; - - private: - void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node, - std::vector *const lsq_perchannel_grad_d_outputs) const; - void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node, - const std::vector &lsq_perchannel_grad_d_outputs, - std::vector *const lsq_perchannel_reduce_grad_outputs) const; -}; - -} // namespace opt -} // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_ diff --git a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h index 3b3ceb024dc..504e3fe77df 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h +++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h @@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy"; constexpr const char kNameReLUV2[] = "ReLUV2"; constexpr const char kNameAccumulateNV2[] = "AccumulateNV2"; constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad"; -constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars"; -constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient"; -constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel"; -constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient"; constexpr const char kNameActsULQ[] = "ActsULQ"; constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad"; constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad"; diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc index eaf99f85fb8..a3fddc286f5 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc @@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits())}, - {"narrow_range", ATTR_DESC(narrow_range, AnyTraits())}}; -OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}}; -REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars)) - -// FakeQuantWithMinMaxVarsGradient -INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = { - {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}}; -ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits())}, - {"narrow_range", ATTR_DESC(narrow_range, AnyTraits())}}; -OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = { - {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}}; -REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient, - ADPT_DESC(FakeQuantWithMinMaxVarsGradient)) - -// FakeQuantWithMinMaxVarsPerChannel -INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}}; -ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits())}, - {"narrow_range", ATTR_DESC(narrow_range, AnyTraits())}}; -OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}}; -REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel, - ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel)) - -// FakeQuantWithMinMaxVarsPerChannelGradient -INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = { - {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}}; -ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits())}, - {"narrow_range", ATTR_DESC(narrow_range, AnyTraits())}}; -OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = { - {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}}; -REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient, - ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient)) - // GreaterEqual INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP; diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h index c2bf528867b..a55452c2722 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h @@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2) DECLARE_OP_ADAPTER(ConfusionMulGrad) DECLARE_OP_USE_OUTPUT(ConfusionMulGrad) -DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars) -DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars) - -DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient) -DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient) - -DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel) -DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel) - -DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient) -DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient) - DECLARE_OP_ADAPTER(GreaterEqual) DECLARE_OP_USE_OUTPUT(GreaterEqual) diff --git a/mindspore/python/mindspore/common/api.py b/mindspore/python/mindspore/common/api.py index d1e58a1a2b7..260f4010173 100644 --- a/mindspore/python/mindspore/common/api.py +++ b/mindspore/python/mindspore/common/api.py @@ -1487,12 +1487,6 @@ class _CellGraphExecutor: """ self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key) - def fetch_info_for_quant_export(self, exec_id): - """Get graph proto from pipeline.""" - if self._graph_executor.has_compiled(exec_id) is False: - return None - return self._graph_executor.fetch_info_for_quant_export(exec_id) - def ms_memory_recycle(): """ diff --git a/mindspore/python/mindspore/compression/OWNERS b/mindspore/python/mindspore/compression/OWNERS deleted file mode 100644 index b71327621c6..00000000000 --- a/mindspore/python/mindspore/compression/OWNERS +++ /dev/null @@ -1,4 +0,0 @@ -approvers: -- zhang_xue_tong -- jpc_chenjianping -- hangangqiang diff --git a/mindspore/python/mindspore/compression/__init__.py b/mindspore/python/mindspore/compression/__init__.py deleted file mode 100644 index d6d80fd6a10..00000000000 --- a/mindspore/python/mindspore/compression/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -MindSpore compression module. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" diff --git a/mindspore/python/mindspore/compression/common/__init__.py b/mindspore/python/mindspore/compression/common/__init__.py deleted file mode 100644 index e4f28e7823b..00000000000 --- a/mindspore/python/mindspore/compression/common/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Common module for various compression algorithms, now only including datatype definition for quantization. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" - -from __future__ import absolute_import -from mindspore.compression.common.constant import QuantDtype - -__all__ = ["QuantDtype"] diff --git a/mindspore/python/mindspore/compression/common/constant.py b/mindspore/python/mindspore/compression/common/constant.py deleted file mode 100644 index 1ab4bb7e903..00000000000 --- a/mindspore/python/mindspore/compression/common/constant.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Note: - Constant module for compression. This is interface that is subject to change or deletion. -""" -from __future__ import absolute_import - -import enum -import re -from types import DynamicClassAttribute - - -__all__ = ["QuantDtype"] - - -@enum.unique -class QuantDtype(enum.Enum): - """ - An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`. - """ - INT2 = "INT2" - INT3 = "INT3" - INT4 = "INT4" - INT5 = "INT5" - INT6 = "INT6" - INT7 = "INT7" - INT8 = "INT8" - - UINT2 = "UINT2" - UINT3 = "UINT3" - UINT4 = "UINT4" - UINT5 = "UINT5" - UINT6 = "UINT6" - UINT7 = "UINT7" - UINT8 = "UINT8" - - def __str__(self): - return f"{self.name}" - - @staticmethod - def is_signed(dtype): - """ - Get whether the quant datatype is signed. - - Args: - dtype (QuantDtype): quant datatype. - - Returns: - bool, whether the input quant datatype is signed. - - Examples: - >>> quant_dtype = QuantDtype.INT8 - >>> is_signed = QuantDtype.is_signed(quant_dtype) - """ - return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5, - QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8] - - @staticmethod - def switch_signed(dtype): - """ - Switch the signed state of the input quant datatype. - - Args: - dtype (QuantDtype): quant datatype. - - Returns: - QuantDtype, quant datatype with opposite signed state as the input. - - Examples: - >>> quant_dtype = QuantDtype.INT8 - >>> quant_dtype = QuantDtype.switch_signed(quant_dtype) - """ - type_map = { - QuantDtype.INT2: QuantDtype.UINT2, - QuantDtype.INT3: QuantDtype.UINT3, - QuantDtype.INT4: QuantDtype.UINT4, - QuantDtype.INT5: QuantDtype.UINT5, - QuantDtype.INT6: QuantDtype.UINT6, - QuantDtype.INT7: QuantDtype.UINT7, - QuantDtype.INT8: QuantDtype.UINT8, - QuantDtype.UINT2: QuantDtype.INT2, - QuantDtype.UINT3: QuantDtype.INT3, - QuantDtype.UINT4: QuantDtype.INT4, - QuantDtype.UINT5: QuantDtype.INT5, - QuantDtype.UINT6: QuantDtype.INT6, - QuantDtype.UINT7: QuantDtype.INT7, - QuantDtype.UINT8: QuantDtype.INT8 - } - return type_map.get(dtype) - - @DynamicClassAttribute - def _value(self): - """The value of the Enum member.""" - return int(re.search(r"(\d+)", self._value_).group(1)) - - @DynamicClassAttribute - def num_bits(self): - """ - Get the num bits of the QuantDtype member. - - Returns: - int, the num bits of the QuantDtype member. - - Examples: - >>> from mindspore.compression.common import QuantDtype - >>> quant_dtype = QuantDtype.INT8 - >>> num_bits = quant_dtype.num_bits - >>> print(num_bits) - 8 - """ - return self._value diff --git a/mindspore/python/mindspore/compression/export/__init__.py b/mindspore/python/mindspore/compression/export/__init__.py deleted file mode 100644 index 139f7d7daae..00000000000 --- a/mindspore/python/mindspore/compression/export/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Compression export module. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" diff --git a/mindspore/python/mindspore/compression/export/quant_export.py b/mindspore/python/mindspore/compression/export/quant_export.py deleted file mode 100644 index d01244e5828..00000000000 --- a/mindspore/python/mindspore/compression/export/quant_export.py +++ /dev/null @@ -1,515 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Note: - Export for quantization. This is interface that is subject to change or deletion. -""" - -from __future__ import absolute_import - -import copy - -import numpy as np - -from mindspore import log as logger -from mindspore import nn, ops -from mindspore._checkparam import Validator -from mindspore.common import Tensor -from mindspore.common import dtype as mstype -from mindspore.common.api import _cell_graph_executor as _executor -from mindspore.common.parameter import Parameter -from mindspore.nn import Cell -from mindspore.nn.layer import quant -from mindspore.ops import operations as P -from mindspore.ops import functional as F -from mindspore.ops.operations import _inner_ops as inner -from mindspore.compression.quant import quant_utils -from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell - -__all__ = ["ExportToQuantInferNetwork"] - - -class QuantBlock(Cell): - r""" - A quant block of Conv/Dense, activation layer for Ascend deploy. - - Calculate Conv or Dense in Int8, with Quant and DeQuant. - - Notes: - This block is only for deploy, and not trainable. - - Args: - in_channels (int): The number of channels in the input space. - out_channels (int): The number of channels in the output space. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype - is same as input x. The values of str refer to the function `initializer`. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is - same as input x. The values of str refer to the function `initializer`. Default: 'zeros'. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. - activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None. - batchnorm (bool): Specifies to used batchnorm or not. Default: None. - activation (string): Specifies activation type. The optional values are as following: - 'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid', - 'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None. - - Inputs: - - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`. - - Outputs: - Tensor of shape :math:`(N, out\_channels)`. - """ - - def __init__(self, - core_op, - weight, - quant_op, - dequant_op, - dequant_scale, - bias=None, - activation=None): - super(QuantBlock, self).__init__() - self.core_op = core_op - self.weight = weight - self.quant = quant_op - self.dequant = dequant_op - self.dequant_scale = dequant_scale - self.bias = bias - self.has_bias = bias is not None - self.activation = activation - self.has_act = activation is not None - self.bias_add = P.BiasAdd() - self.sub = P.Sub() - self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset') - - def construct(self, x): - x = self.quant(x) - if self.has_bias: - weight = self.sub(self.weight, self.weight_offset) - x = self.core_op(x, weight) - x = self.bias_add(x, self.bias) - else: - x = self.core_op(x, self.weight) - x = self.dequant(x, self.dequant_scale) - x = F.cast(x, mstype.float32) - if self.has_act: - x = self.activation(x) - return x - - def extend_repr(self): - s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]' - if self.has_bias: - s += f', bias=shape[{self.bias.shape}]' - if self.has_act: - s += f', activation={self.activation}' - s += f', dequant={self.dequant}' - return s - - -class QuantMindirBlock(Cell): - """A quant binary block of Conv/Dense, activation layer for export MINDIR model. - - Args: - core_op (Cell): The operation cell. - weight (Tensor): The weight of the cell. - bias (Tensor): The bias of the cell. Default: None. - activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None. - param_dict (dict): The information of the cell. - """ - - def __init__(self, - core_op, - weight, - bias=None, - activation=None, - param_dict=None): - - super(QuantMindirBlock, self).__init__() - self.core_op = core_op - if activation is not None: - self.core_op.add_prim_attr("activation_name", activation.__class__.__name__) - self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"])) - self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"])) - if param_dict["output_maxq"] is not None: - self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"])) - self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"])) - self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"])) - if hasattr(core_op, 'pad_mode'): - self.core_op.add_prim_attr("pad_mode", core_op.pad_mode) - self.core_op.add_prim_attr("act_num_bits", Tensor(8)) - self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"])) - self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"])) - if param_dict["input_narrow_range"] is not None: - self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"])) - if param_dict["output_narrow_range"] is not None: - self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"])) - if param_dict["input_maxq"] == 'None': - self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"])) - self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"])) - elif param_dict["input_maxq"] is not None: - self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"])) - self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"])) - - self.weight = weight - self.bias = bias - self.has_bias = bias is not None - self.activation = activation - self.has_act = activation is not None - self.bias_add = P.BiasAdd() - - def construct(self, x): - if self.has_bias: - x = self.core_op(x, self.weight) - x = self.bias_add(x, self.bias) - else: - x = self.core_op(x, self.weight) - if self.has_act: - x = self.activation(x) - return x - - def extend_repr(self): - s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]' - if self.has_bias: - s += f', bias=shape[{self.bias.shape}]' - if self.has_act: - s += f', activation={self.activation}' - return s - - -class ExportToQuantInferNetwork: - """ - Convert quantization aware network to infer network. - - Args: - network (Cell): MindSpore quantization aware training network. - inputs (Tensor): Input tensors of the `quantization aware training network`. - mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network. - Default: 127.5. - std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer - of network. Default: 127.5. - is_mindir (bool): Whether export MINDIR format. Default: False. - - Returns: - Cell, Infer network. - """ - - def __init__(self, network, mean, std_dev, *inputs, is_mindir=False): - network = Validator.check_isinstance('network', network, (nn.Cell,)) - self.data_type = mstype.int8 - self.network = copy.deepcopy(network) - self.network_bk = copy.deepcopy(network) - self.get_inputs_table(inputs) - self.mean = mean - self.std_dev = std_dev - self.is_mindir = is_mindir - self.upcell = None - - @staticmethod - def __get_dequant_scale(scale_a_in, scale_w): - """Get dequant scale""" - scale_deq = scale_a_in * scale_w - - # fuse parameter - # |--------|47:40|--------|39:32|--------|31:0| - # offset_w [8] shift_N [8] deq_scale [32] - float32_deq_scale = scale_deq.astype(np.float32) - uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32) - scale_length = scale_deq.size # channel - dequant_param = np.zeros(scale_length, dtype=np.uint64) - for index in range(scale_length): - dequant_param[index] += uint32_deq_scale[index] - scale_deq = Tensor(dequant_param, mstype.uint64) - return scale_deq - - def get_inputs_table(self, inputs): - """Get the input quantization parameters of quantization cell for quant export.""" - phase_name = 'export_quant' - graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False) - self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id) - - def run(self): - """Start to convert.""" - logger.warning("The compression module is deprecated and may not be supported in later version, please use " - "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.") - self.network.update_cell_prefix() - network = self.network - if isinstance(network, _AddFakeQuantInput): - network = network.network - network = self._convert_quant2deploy(network) - return network - - def _get_quant_block(self, cell_core, activation, fake_quant_a_out): - """convert network's quant subcell to deploy subcell""" - scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out) - - # Build the `Quant` `Dequant` op. - # Quant only support perlayer version. Need check here. - if float(scale_a_in) == 0: - raise ValueError("If `scale_a_in` is zero, will lead to zero error.") - quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in)) - scale_deq = self.__get_dequant_scale(scale_a_in, scale_w) - dequant_op = inner.Dequant() - - if isinstance(activation, _AddFakeQuantAfterSubCell): - activation = activation.subcell - elif hasattr(activation, "get_origin"): - activation = activation.get_origin() - - # get op - if isinstance(cell_core, quant.DenseQuant): - op_core = P.MatMul() - else: - op_core = cell_core.conv - - # get the `weight` and `bias` - weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w) - - if self.is_mindir: - block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict) - else: - block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) - return block - - def _get_input_quant_param(self, minq_name, np_type, param_dict): - """get input quant parameter for quant block""" - fake_quant_a_in_prefix = minq_name[:-5] - cells = self.network_bk.cells_and_names() - for cell in cells: - if cell[0].endswith(fake_quant_a_in_prefix): - fake_quant_a_in = cell[1] - break - scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ - quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type) - param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range - return scale_a_in, zp_a_in - - def __get_quant_param(self, cell_core, fake_quant_a_out): - """get parameter for quant block""" - w_minq_name = cell_core.fake_quant_weight.minq.name - w_maxq_name = cell_core.fake_quant_weight.maxq.name - np_type = mstype.dtype_to_nptype(self.data_type) - param_dict = dict() - param_dict["filter_maxq"] = None - param_dict["filter_minq"] = None - param_dict["output_maxq"] = None - param_dict["output_minq"] = None - param_dict["input_maxq"] = None - param_dict["input_minq"] = None - param_dict["input_narrow_range"] = None - param_dict["output_narrow_range"] = None - param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range - param_dict["mean"] = self.mean - param_dict["std_dev"] = self.std_dev - param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric - param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits - - scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \ - quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type) - if fake_quant_a_out is not None: - _, _, param_dict["output_maxq"], param_dict["output_minq"] = \ - quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type) - param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range - - info = self.quant_info_table.get(w_minq_name, None) - if not info: - info = self.quant_info_table.get(w_maxq_name, None) - if info: - _, minq_name = info - if minq_name == 'input': - scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \ - (1 / self.std_dev), round(self.mean), 'None', 'None' - else: - scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict) - else: - # skip quant layer - scale_a_in, zp_a_in = 1.0, 0.0 - return scale_a_in, zp_a_in, scale_w, zp_w, param_dict - - def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w): - """Get weight and bias for quantizaiton""" - np_type = mstype.dtype_to_nptype(self.data_type) - weight = cell_core.weight.data.asnumpy() - bias = None - if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)): - if cell_core.has_bias: - bias = cell_core.bias.data.asnumpy() - elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)): - weight, bias = quant_utils.fold_batchnorm(weight, cell_core) - elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant): - weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core) - weight_b = weight - bias_b = bias - # apply the quant - quant_min, quant_max = quant_utils.get_quant_min_max(np_type, - cell_core.fake_quant_weight.num_bits, - cell_core.fake_quant_weight.narrow_range) - weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max) - if bias is not None: - if 0 in scale_a_in: - raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.") - if 0 in scale_w: - raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.") - bias = Tensor(bias / scale_a_in / scale_w, mstype.int32) - - if isinstance(cell_core, quant.DenseQuant): - weight = np.transpose(weight) - weight_b = np.transpose(weight_b) - - weight_tensor = Tensor(weight, self.data_type) - weight_b_tensor = Tensor(weight_b) - if bias_b is not None: - bias_b_tensor = Tensor(bias_b, mstype.float32) - return weight_tensor, bias, weight_b_tensor, bias_b_tensor - return weight_tensor, bias, weight_b_tensor, None - - def _add_output_min_max_for_op(self, origin_op, fake_quant_cell): - """add output quant info for quant op for export mindir.""" - if self.is_mindir: - if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'): - np_type = mstype.dtype_to_nptype(self.data_type) - _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type) - origin_op.add_prim_attr('output_maxq', Tensor(maxq)) - origin_op.add_prim_attr('output_minq', Tensor(minq)) - - def _convert_subcell(self, network, change, name, subcell): - """Convert subcell to ant subcell.""" - if subcell is not None and hasattr(subcell, "fake_quant_weight"): - new_subcell = self._get_quant_block(subcell, None, None) - prefix = subcell.param_prefix - new_subcell.update_parameters_name(prefix + '.') - self.upcell = new_subcell - network.insert_child_to_cell(name, new_subcell) - change = True - return network, change - - def _convert_conv(self, network, change, name, subcell): - """Convert subcell to ant subcell for conv.""" - cell_core = subcell.conv - activation = subcell.activation - fake_quant_act = None - if hasattr(activation, 'fake_quant_act_before'): - fake_quant_act = activation.fake_quant_act_before - elif hasattr(activation, 'fake_quant_act'): - fake_quant_act = activation.fake_quant_act - if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): - new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) - self.upcell = None - prefix = subcell.param_prefix - new_subcell.update_parameters_name(prefix + '.') - network.insert_child_to_cell(name, new_subcell) - change = True - return network, change - - def _convert_dense(self, network, change, name, subcell): - """Convert subcell to ant subcell for dense.""" - cell_core = subcell.dense - activation = subcell.activation - fake_quant_act = None - if hasattr(activation, 'fake_quant_act_before'): - fake_quant_act = activation.fake_quant_act_before - elif hasattr(activation, 'fake_quant_act'): - fake_quant_act = activation.fake_quant_act - if cell_core is not None and hasattr(cell_core, "fake_quant_weight"): - new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act) - prefix = subcell.param_prefix - new_subcell.update_parameters_name(prefix + '.') - network.insert_child_to_cell(name, new_subcell) - self.upcell = None - change = True - return network, change - - def _convert_act(self, subcell): - """Convert subcell to ant subcell for activation.""" - activation = subcell.get_origin() - if isinstance(activation, nn.ReLU): - self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act) - elif isinstance(activation, nn.ReLU6): - self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act) - if self.upcell: - self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act) - return activation - - def _convert_add(self, subcell): - """Convert subcell to ant subcell for add.""" - if isinstance(subcell.add, _AddFakeQuantAfterSubCell): - add_op = subcell.add.subcell - subcell.__delattr__("add") - subcell.__setattr__("add", add_op) - add_op = subcell.add - self._add_output_min_max_for_op(add_op, subcell.fake_quant_act) - subcell.__delattr__("fake_quant_act") - subcell.__setattr__("fake_quant_act", P.identity()) - - def _convert_observer(self, network, name, subcell): - """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver.""" - if self.upcell: - self._add_output_min_max_for_op(self.upcell.core_op, subcell) - network.__delattr__(name) - network.__setattr__(name, P.identity()) - - def _convert_fake_quant_after_cell(self, network, name, subcell): - """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell.""" - op = subcell.subcell - self._add_output_min_max_for_op(op, subcell.fake_quant_act) - network.__delattr__(name) - network.__setattr__(name, op) - - def _convert_core_quant_subcell(self, network, change, name, subcell): - """Convert subcell to ant subcell for conv and dense.""" - is_core_subcell = True - if isinstance(subcell, nn.Conv2dBnAct): - network, change = self._convert_conv(network, change, name, subcell) - elif isinstance(subcell, nn.DenseBnAct): - network, change = self._convert_dense(network, change, name, subcell) - elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv, - quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)): - network, change = self._convert_subcell(network, change, name, subcell) - else: - is_core_subcell = False - return is_core_subcell, network, change - - def _convert_other_quant_subcell(self, network, change, name, subcell): - """Convert subcell to ant subcell for cell except conv and dense.""" - is_other_subcell = True - if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"): - activation = self._convert_act(subcell) - network.insert_child_to_cell(name, activation) - change = True - elif isinstance(subcell, nn.TensorAddQuant): - self._convert_add(subcell) - elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver): - self._convert_observer(network, name, subcell) - elif isinstance(subcell, _AddFakeQuantAfterSubCell): - self._convert_fake_quant_after_cell(network, name, subcell) - change = True - else: - is_other_subcell = False - return is_other_subcell, network, change - - def _convert_quant2deploy(self, network): - """Convert network's all quant subcell to deploy subcell.""" - cells = network.name_cells() - change = False - for name in cells: - subcell = cells[name] - if subcell == network: - continue - is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell) - is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell) - if not is_core_quant_subcell and not is_other_quant_subcell: - self.upcell = None - self._convert_quant2deploy(subcell) - if isinstance(network, nn.SequentialCell) and change: - network.cell_list = list(network.cells()) - return network diff --git a/mindspore/python/mindspore/compression/quant/__init__.py b/mindspore/python/mindspore/compression/quant/__init__.py deleted file mode 100644 index 7c098f17604..00000000000 --- a/mindspore/python/mindspore/compression/quant/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Quantization module, including base class of the quantizer, the quantization aware training algorithm, -and quantization utils. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" - -from __future__ import absolute_import -from .quantizer import OptimizeOption -from .qat import QuantizationAwareTraining, create_quant_config -from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers - -__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining", - "create_quant_config", "OptimizeOption"] diff --git a/mindspore/python/mindspore/compression/quant/qat.py b/mindspore/python/mindspore/compression/quant/qat.py deleted file mode 100644 index a1826c6923b..00000000000 --- a/mindspore/python/mindspore/compression/quant/qat.py +++ /dev/null @@ -1,634 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Quantization aware training - -User can use quantization aware to train a model. MindSpore supports quantization aware training, -which models quantization errors in both the forward and backward passes using fake-quantization -operations. Note that the entire computation is carried out in floating point. At the end of quantization -aware training, MindSpore provides conversion functions to convert the trained model into lower precision. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" - -from __future__ import absolute_import -import re -import numpy as np -import mindspore.context as context -from mindspore import log as logger -from mindspore import nn, ops -from mindspore._checkparam import Validator, Rel -from mindspore.nn.layer import quant -from mindspore.ops import functional as F -from ..common import QuantDtype -from .quantizer import Quantizer, OptimizeOption -from .quant_utils import compute_kl_threshold - -__all__ = ["QuantizationAwareTraining", "create_quant_config"] - - -def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver), - quant_delay=(0, 0), - quant_dtype=(QuantDtype.INT8, QuantDtype.INT8), - per_channel=(False, False), - symmetric=(False, False), - narrow_range=(False, False), - mode="DEFAULT"): - r""" - Config the observer type of weights and data flow with quant parameters. - - Args: - quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element - applies to weights and the second applies to data flow. Currently, only - :class:`FakeQuantWithMinMaxObserver` supported. - Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver). - quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized - during train and eval. The first element represents weights and the second element represents data flow. - Default: (0, 0). - quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first - element represents weights and the second element represents data flow. - Default: (QuantDtype.INT8, QuantDtype.INT8). - per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True` - then base on per channel, otherwise base on per layer. The first element represents weights - and the second element represents data flow, and the second element must be `False` now. - Default: (False, False). - symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then - base on symmetric, otherwise base on asymmetric. The first element represents weights and the second - element represents data flow. Default: (False, False). - narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not. - The first element represents weights and the second element represents data flow. - Default: (False, False). - mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported. - Default: "DEFAULT". - - Returns: - QuantConfig, contains the observer type of weight and activation. - - Raises: - ValueError: If the second element of `per_channel` is not `False`. - """ - if per_channel[-1]: - raise ValueError("Arg 'per_channel' second element must be 'False'.") - weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0], - per_channel=per_channel[0], symmetric=symmetric[0], - narrow_range=narrow_range[0], mode=mode) - act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1], - per_channel=per_channel[-1], symmetric=symmetric[-1], - narrow_range=narrow_range[-1], mode=mode) - return quant.QuantConfig(weight=weight_observer, activation=act_observer) - - -class _AddFakeQuantInput(nn.Cell): - """ - Add FakeQuant OP at input of the network. Only support one input case. - """ - - def __init__(self, network, quant_delay=0): - super(_AddFakeQuantInput, self).__init__(auto_prefix=False) - self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6, - quant_delay=quant_delay, ema=True) - self.fake_quant_input.update_parameters_name('fake_quant_input.') - self.network = network - - def construct(self, data): - data = self.fake_quant_input(data) - output = self.network(data) - return output - - -class _AddFakeQuantAfterSubCell(nn.Cell): - """ - Add FakeQuant OP after of the sub Cell. - """ - - def __init__(self, subcell, **kwargs): - super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False) - self.subcell = subcell - self.mode = "DEFAULT" - self.max_init = 6 - self.min_init = -6 - - if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]: - self.mode = "LEARNED_SCALE" - self.max_init = 16 - self.min_init = -16 - - self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init, - max_init=self.max_init, - ema=True, - quant_dtype=kwargs.get("quant_dtype"), - quant_delay=kwargs.get("quant_delay"), - per_channel=kwargs.get("per_channel"), - symmetric=kwargs.get("symmetric"), - narrow_range=kwargs.get("narrow_range"), - mode=self.mode) - - def construct(self, *data): - output = self.subcell(*data) - output = self.fake_quant_act(output) - return output - - -class QuantizationAwareTraining(Quantizer): - r""" - Quantizer for quantization aware training. - - Args: - bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True. - freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance. - Default: 1e7. - quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized - during train and eval. The first element represents weights and the second element represents data flow. - Default: (0, 0). - quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first - element represents weights and the second element represents data flow. It is necessary to consider the - precision support of hardware devices in the practical quantization infer scenario. - Default: (QuantDtype.INT8, QuantDtype.INT8). - per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True` - then base on per channel, otherwise base on per layer. The first element represents weights and the - second element represents data flow, and the second element must be `False` now. Default: (False, False). - symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then - base on symmetric, otherwise base on asymmetric. The first element represents weights and the second - element represents data flow. Default: (False, False). - narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not. - The first element represents weights and the second element represents data flow. - Default: (False, False). - optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently - only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured, - `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which - includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators - such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this - scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT. - one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Raises: - TypeError: If the element of `quant_delay` or `freeze_bn` is not int. - TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range` - is not bool. - TypeError: If the element of `quant_dtype` is not `QuantDtype`. - ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is - not less than 2. - ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0. - ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True). - ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True). - ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0). - - Examples: - >>> from mindspore.compression.quant import QuantizationAwareTraining - >>> from mindspore import nn - >>> class LeNet5(nn.Cell): - ... def __init__(self, num_class=10, channel=1): - ... super(LeNet5, self).__init__() - ... self.type = "fusion" - ... self.num_class = num_class - ... - ... # change `nn.Conv2d` to `nn.Conv2dBnAct` - ... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu') - ... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu') - ... # change `nn.Dense` to `nn.DenseBnAct` - ... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') - ... self.fc2 = nn.DenseBnAct(120, 84, activation='relu') - ... self.fc3 = nn.DenseBnAct(84, self.num_class) - ... - ... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - ... self.flatten = nn.Flatten() - ... - ... def construct(self, x): - ... x = self.conv1(x) - ... x = self.max_pool2d(x) - ... x = self.conv2(x) - ... x = self.max_pool2d(x) - ... x = self.flatten(x) - ... x = self.fc1(x) - ... x = self.fc2(x) - ... x = self.fc3(x) - ... return x - ... - >>> net = LeNet5() - >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False]) - >>> net_qat = quantizer.quantize(net) - """ - __quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"] - - def __init__(self, - bn_fold=True, - freeze_bn=10000000, - quant_delay=(0, 0), - quant_dtype=(QuantDtype.INT8, QuantDtype.INT8), - per_channel=(False, False), - symmetric=(False, False), - narrow_range=(False, False), - optimize_option=OptimizeOption.QAT, - one_conv_fold=True): - """Init for QuantizationAwareTraining quantizer""" - super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option) - - def convert2list(name, value): - if not isinstance(value, list) and not isinstance(value, tuple): - value = [value] - elif len(value) > 2: - raise ValueError("input `{}` len should less then 2".format(name)) - return value - - quant_delay_list = convert2list("quant delay", quant_delay) - quant_dtype_list = convert2list("quant dtype", quant_dtype) - per_channel_list = convert2list("per channel", per_channel) - symmetric_list = convert2list("symmetric", symmetric) - narrow_range_list = convert2list("narrow range", narrow_range) - - self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay") - self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay") - self.bn_fold = Validator.check_bool(bn_fold, "bn fold") - self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn") - self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype) - self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype) - self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel") - self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel") - self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric") - self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric") - self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range") - self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range") - self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold") - self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv, - nn.DenseBnAct: self._convert_dense} - self.mode = "DEFAULT" - if OptimizeOption.LEARNED_SCALE in self.optimize_option: - self.mode = "LEARNED_SCALE" - if not self.weight_symmetric or not self.act_symmetric: - raise ValueError("OptimizeOption.LEARNED_SCALE currently only support " - "symmetric=(True, True) for quant") - if not self.weight_range or not self.act_range: - raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) " - "for quant") - if self.freeze_bn != 0: - raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, " - "but get freeze_bn={}".format(self.freeze_bn)) - if self.weight_qdelay != 0 or self.act_qdelay != 0: - raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)") - self.quant_config = create_quant_config(quant_delay=quant_delay_list, - quant_dtype=quant_dtype_list, - per_channel=per_channel_list, - symmetric=symmetric_list, - narrow_range=narrow_range_list, - mode=self.mode) - self.eps = 1e-5 - - @staticmethod - def _convert_op_name(name): - pattern = re.compile(r'([A-Z]{1})') - name_new = re.sub(pattern, r'_\1', name).lower() - if name_new[0] == '_': - name_new = name_new[1:] - return name_new - - def quantize(self, network): - """ - Quant API to convert input network to a quantization aware training network. - - Note: - Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`. - - Args: - network (Cell): network to be quantized. - - Returns: - Cell, a quantization aware training network. - - Raises: - KeyError: If the `device_target` set in context is not in `support_device`. - """ - - logger.warning("The compression module is deprecated and may not be supported in later version, please use " - "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.") - support_device = ["Ascend", "GPU"] - if context.get_context('device_target') not in support_device: - raise KeyError("Unsupported {} device target.".format(context.get_context('device_target'))) - - if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option: - network.update_cell_prefix() - network = self._convert_subcells2quant(network) - network.update_cell_type("quant") - return network - - def _convert_subcells2quant(self, network): - """ - convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell - """ - cells = network.name_cells() - change = False - for name in cells: - subcell = cells[name] - if subcell == network: - continue - if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)): - prefix = subcell.param_prefix - new_subcell = self._convert_method_map[type(subcell)](subcell) - new_subcell.update_parameters_name(prefix + '.') - network.insert_child_to_cell(name, new_subcell) - change = True - else: - self._convert_subcells2quant(subcell) - if isinstance(network, nn.SequentialCell) and change: - network.cell_list = list(network.cells()) - - # add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell. - if isinstance(network, (nn.FakeQuantWithMinMaxObserver, - nn.Conv2dBnFoldQuantOneConv, - nn.Conv2dBnFoldQuant, - nn.Conv2dBnWithoutFoldQuant, - nn.Conv2dQuant, - nn.DenseQuant, - nn.ActQuant, - nn.TensorAddQuant, - nn.MulQuant)): - return network - - add_list = [] - for name in network.__dict__: - if name[0] == '_': - continue - attr = network.__dict__[name] - if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name: - add_list.append((name, attr)) - for name, prim_op in add_list: - prefix = name - add_quant = _AddFakeQuantAfterSubCell(prim_op, - quant_dtype=self.act_dtype, - quant_delay=self.act_qdelay, - per_channel=self.act_channel, - symmetric=self.act_symmetric, - narrow_range=self.act_range, - optimize_option=self.optimize_option) - if network.param_prefix: - prefix = '.'.join([network.param_prefix, prefix]) - add_quant.update_parameters_name(prefix + '.') - del network.__dict__[name] - network.insert_child_to_cell(name, add_quant) - return network - - def _convert_conv(self, subcell): - """ - convert Conv2d cell to quant cell - """ - min_init = -6 - max_init = 6 - if self.eps == 0: - raise ValueError("`epsilon` is zero may lead to divide zero error") - if OptimizeOption.LEARNED_SCALE in self.optimize_option: - subcell_weight_para = subcell.conv.weight.data.asnumpy() - if subcell.has_bn: - scale_factor = (subcell.batchnorm.gamma.data.asnumpy() / - np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps)) - subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1) - min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype) - self.quant_config = self.quant_config._replace( - weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init)) - - conv_inner = subcell.conv - if subcell.has_bn: - bn_inner = subcell.batchnorm - if self.bn_fold: - if self.one_conv_fold: - conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels, - conv_inner.out_channels, - kernel_size=conv_inner.kernel_size, - stride=conv_inner.stride, - pad_mode=conv_inner.pad_mode, - padding=conv_inner.padding, - dilation=conv_inner.dilation, - group=conv_inner.group, - eps=bn_inner.eps, - momentum=1 - bn_inner.momentum, - has_bias=conv_inner.has_bias, - bias_init=conv_inner.bias_init, - quant_config=self.quant_config, - quant_dtype=self.weight_dtype, - fake=True) - else: - conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels, - conv_inner.out_channels, - kernel_size=conv_inner.kernel_size, - stride=conv_inner.stride, - pad_mode=conv_inner.pad_mode, - padding=conv_inner.padding, - dilation=conv_inner.dilation, - group=conv_inner.group, - eps=bn_inner.eps, - momentum=1 - bn_inner.momentum, - has_bias=conv_inner.has_bias, - bias_init=conv_inner.bias_init, - freeze_bn=self.freeze_bn, - quant_config=self.quant_config, - quant_dtype=self.weight_dtype, - fake=True) - # change original network Batch Normalization OP parameters to quant network - conv_inner.gamma = subcell.batchnorm.gamma - conv_inner.beta = subcell.batchnorm.beta - conv_inner.moving_mean = subcell.batchnorm.moving_mean - conv_inner.moving_variance = subcell.batchnorm.moving_variance - else: - conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels, - conv_inner.out_channels, - kernel_size=conv_inner.kernel_size, - stride=conv_inner.stride, - pad_mode=conv_inner.pad_mode, - padding=conv_inner.padding, - dilation=conv_inner.dilation, - group=conv_inner.group, - eps=bn_inner.eps, - momentum=1 - bn_inner.momentum, - has_bias=conv_inner.has_bias, - bias_init=conv_inner.bias_init, - quant_config=self.quant_config) - # change original network Batch Normalization OP parameters to quant network - conv_inner.batchnorm.gamma = subcell.batchnorm.gamma - conv_inner.batchnorm.beta = subcell.batchnorm.beta - conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean - conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance - del subcell.batchnorm - subcell.batchnorm = None - subcell.has_bn = False - else: - conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels, - kernel_size=conv_inner.kernel_size, stride=conv_inner.stride, - pad_mode=conv_inner.pad_mode, padding=conv_inner.padding, - dilation=conv_inner.dilation, group=conv_inner.group, - has_bias=conv_inner.has_bias, quant_config=self.quant_config, - quant_dtype=self.weight_dtype) - # change original network Conv2D OP parameters to quant network - conv_inner.weight = subcell.conv.weight - if subcell.conv.has_bias: - conv_inner.bias = subcell.conv.bias - subcell.conv = conv_inner - if subcell.has_act and subcell.activation is not None: - subcell.activation = self._convert_activation(subcell.activation) - elif subcell.after_fake: - subcell.has_act = True - subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype, - quant_delay=self.act_qdelay, per_channel=self.act_channel, - symmetric=self.act_symmetric, narrow_range=self.act_range, - optimize_option=self.optimize_option) - return subcell - - def _convert_dense(self, subcell): - """ - convert dense cell to quant cell - """ - min_init = -6 - max_init = 6 - if self.eps == 0: - raise ValueError("`epsilon` is zero may lead to divide zero error") - if OptimizeOption.LEARNED_SCALE in self.optimize_option: - subcell_weight_para = subcell.dense.weight.data.asnumpy() - if subcell.has_bn: - scale_factor = (subcell.batchnorm.gamma.data.asnumpy() / - np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps)) - subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1) - min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype) - self.quant_config = self.quant_config._replace( - weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init)) - - dense_inner = subcell.dense - dense_inner = quant.DenseQuant(dense_inner.in_channels, - dense_inner.out_channels, - has_bias=dense_inner.has_bias, - quant_config=self.quant_config, - quant_dtype=self.weight_dtype) - # change original network Dense OP parameters to quant network - dense_inner.weight = subcell.dense.weight - if subcell.dense.has_bias: - dense_inner.bias = subcell.dense.bias - subcell.dense = dense_inner - if subcell.has_act and subcell.activation is not None: - subcell.activation = self._convert_activation(subcell.activation) - elif subcell.after_fake: - subcell.has_act = True - subcell.activation = _AddFakeQuantAfterSubCell(F.identity, - quant_dtype=self.act_dtype, - quant_delay=self.act_qdelay, - per_channel=self.act_channel, - symmetric=self.act_symmetric, - narrow_range=self.act_range, - optimize_option=self.optimize_option) - return subcell - - def _convert_activation(self, activation): - """ - convert activation cell to quant cell - """ - act_class = activation.__class__ - act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid] - act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish] - - if act_class in act_list: - return quant.ActQuant(activation=activation, - quant_config=self.quant_config, - quant_dtype=self.act_dtype) - if act_class in act_list_with_fake_before: - return quant.ActQuant(activation=activation, - ema=True, - fake_before=True, - quant_config=self.quant_config, - quant_dtype=self.act_dtype) - raise ValueError("Unsupported activation in auto quant: ", act_class) - - def _kl_init(self, subcell_weight_para, weight_dtype): - """ - Calculate the value of max_init and min_init with compute_kl_threshold. - """ - if self.weight_channel: - max_init = [compute_kl_threshold(weight_para_each, weight_dtype) - for weight_para_each in subcell_weight_para] - min_init = [-x for x in max_init] - else: - max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)] - min_init = [-x for x in max_init] - return min_init, max_init - - def _set_mixed_bits(self, network, strategy): - r""" - Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE` - optimize_option. - - Args: - network (Cell): Input network. - strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8], - ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the - convolution layer is supported. - - Returns: - Cell, a network with mixed bit strategy configured. - - Raises: - ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`. - """ - if OptimizeOption.LEARNED_SCALE not in self.optimize_option: - raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` " - "optimize_option.") - - quantizable_idx = [] - pass_cell = None - for i, cell_and_name in enumerate(network.cells_and_names()): - cell = cell_and_name[1] - if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell: - quantizable_idx.append(i) - - if len(quantizable_idx) != len(strategy): - raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.") - - quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)} - type_map = { - QuantDtype.INT2.num_bits: QuantDtype.INT2, - QuantDtype.INT3.num_bits: QuantDtype.INT3, - QuantDtype.INT4.num_bits: QuantDtype.INT4, - QuantDtype.INT5.num_bits: QuantDtype.INT5, - QuantDtype.INT6.num_bits: QuantDtype.INT6, - QuantDtype.INT7.num_bits: QuantDtype.INT7, - QuantDtype.INT8.num_bits: QuantDtype.INT8 - } - if self.eps == 0: - raise ValueError("`epsilon` is zero may lead to divide zero error") - for i, cell_and_name in enumerate(network.cells_and_names()): - cell = cell_and_name[1] - if i not in quantizable_idx: - continue - if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)): - cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0]) - if cell.weight_dtype is None: - raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0]) - if isinstance(cell, nn.Conv2dBnAct): - subcell_weight_para = cell.conv.weight.data.asnumpy() - if hasattr(cell.conv, 'gamma'): - scale_factor = (cell.conv.gamma.data.asnumpy() / - np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps)) - subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1) - min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype) - cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype, - min_init=min_init, - max_init=max_init) - elif isinstance(cell, nn.DenseBnAct): - subcell_weight_para = cell.dense.weight.data.asnumpy() - if hasattr(cell.dense, 'gamma'): - scale_factor = (cell.dense.gamma.data.asnumpy() / - np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps)) - subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1) - min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype) - cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype, - min_init=min_init, - max_init=max_init) - return network diff --git a/mindspore/python/mindspore/compression/quant/quant_utils.py b/mindspore/python/mindspore/compression/quant/quant_utils.py deleted file mode 100644 index cdd1980b182..00000000000 --- a/mindspore/python/mindspore/compression/quant/quant_utils.py +++ /dev/null @@ -1,462 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Quantization utils. - -Note: This is an experimental interface that is subject to change and/or deletion. -""" - -from __future__ import absolute_import -import numpy as np -from mindspore._checkparam import Validator -from mindspore import log as logger -from ... import nn - -__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"] - - -def cal_quantization_params(input_min, - input_max, - quant_min, - quant_max, - data_type, - symmetric=False): - r""" - Calculate quantization params for scale and zero point. - - Args: - input_min (numpy.ndarray): The dimension of channel or 1. - input_max (numpy.ndarray): The dimension of channel or 1. - quant_min (int): The minimum quantization integer. - quant_max (int): The maximum quantization integer. - data_type (numpy type) : Can be numpy int8, numpy uint8. - symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. - - Returns: - scale (numpy.ndarray): quantization param. - zero point (numpy.ndarray): quantization param. - """ - if quant_min == quant_max: - raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.") - - input_max = np.maximum(0.0, input_max) - input_min = np.minimum(0.0, input_min) - - if input_min.shape != input_max.shape: - raise ValueError("input min shape should be equal to input max.") - if len(input_min.shape) > 1: - raise ValueError("input min and max shape should be one dim.") - if (input_min > input_max).all(): - raise ValueError("input_min min should be less than input max.") - if (input_max == input_min).all(): - return np.ones(input_min.shape), np.zeros(input_min.shape) - - # calculate scale - if symmetric: - input_max = np.maximum(-input_min, input_max) - input_min = -input_max - scale = (input_max - input_min) / (quant_max - quant_min) - - # calculate zero point - if data_type == np.int8 and symmetric: - zp = np.zeros(input_min.shape) - else: - if scale == 0.0: - raise ValueError("scale can not be 0.") - zp_double = quant_min - input_min / scale - zp = np.floor(zp_double + 0.5) - - return scale, zp - - -def get_quant_min_max(data_type, num_bits=8, narrow_range=False): - """Calculate quantization params for minimum/maximum quantization integer""" - if data_type == np.int8: - quant_min = 0 - 2 ** (num_bits - 1) - quant_max = 2 ** (num_bits - 1) - 1 - elif data_type == np.uint8: - quant_min = 0 - quant_max = 2 ** num_bits - 1 - else: - raise ValueError("Unsupported datatype({})".format(data_type)) - if narrow_range: - quant_min = quant_min + 1 - return quant_min, quant_max - - -def weight2int(data, scale, zero_point, quant_min, quant_max): - r""" - Calculate int8/uint8 weight from fp32. the formula is defined as: - - .. math:: - int8/uint8 = round(float/scale) + offset - - Args: - data (numpy.ndarray): The dimension of channel or 1. Should be NCHW. - scale (numpy.ndarray): The dimension of channel or 1. - zero_point (numpy.ndarray): The dimension of channel or 1. - quant_min (int): The minimum quantization integer. - quant_max (int): The maximum quantization integer. - - Returns: - weight (numpy.ndarray): The dimension of channel or 1. - """ - if scale.shape != zero_point.shape: - raise ValueError("`scale` and `zero_point` should have the same shape.") - if scale.shape[0] < 0: - raise ValueError("`scale` and `zero_point` shape should be greater than zero.") - if 0 in scale: - raise ValueError("Zero exist in `scale` which will lead to divide zero error.") - if len(scale.shape) >= 1 and scale.shape[0] > 1: - # for perchannel - if scale.shape[0] == data.shape[0]: - # `Conv2d` or `Dense` op weight - shape_list = [-1] + [1] * len(data.shape[1:]) - scale = scale.reshape(shape_list) - zero_point = zero_point.reshape(shape_list) - elif scale.shape[0] == data.shape[1]: - # `DepthwiseConv2d` op weight - shape_list = [1, -1] + [1] * len(data.shape[2:]) - scale = scale.reshape(shape_list) - zero_point = zero_point.reshape(shape_list) - else: - raise ValueError("Unsupported weight shape({})".format(data.shape)) - - weight_int = np.round((data / scale) + zero_point) - weight_int[weight_int > quant_max] = quant_max - weight_int[weight_int < quant_min] = quant_min - return weight_int - - -def scale_zp_max_min_from_fake_quant_cell(cell, data_type): - """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`.""" - minq = cell.minq.data.asnumpy() - maxq = cell.maxq.data.asnumpy() - # make sure maxq > 0 and minq <= 0 - if cell.mode == 'LEARNED_SCALE': - maxq = np.abs(maxq) - minq = -np.abs(minq) - quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range) - symmetric = cell.symmetric and not cell.neg_trunc - scale, zp = cal_quantization_params( - minq, maxq, - quant_min, quant_max, data_type, - symmetric=symmetric) - return scale, zp, maxq, minq - - -def fold_batchnorm(weight, cell_quant): - r""" - Fold the batchnorm in `Conv2dBnFoldQuant` to weight. - - Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive. - - Args: - weight (numpy.ndarray): Weight of `cell_quant`. - cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`. - - Returns: - weight (numpy.ndarray): Folded weight. - bias (numpy.ndarray): Folded bias. - """ - variance = cell_quant.moving_variance.data.asnumpy() - mean = cell_quant.moving_mean.data.asnumpy() - gamma = cell_quant.gamma.data.asnumpy() - beta = cell_quant.beta.data.asnumpy() - epsilon = cell_quant.eps - if epsilon == 0: - raise ValueError("`epsilon` is zero may lead to divide zero error") - sigma = np.sqrt(variance + epsilon) - - if gamma.shape[0] == weight.shape[0]: - # `Conv2d` or `Dense` op weight - shape_list = [-1] + [1] * len(weight.shape[1:]) - _gamma = gamma.reshape(shape_list) - _sigma = sigma.reshape(shape_list) - elif gamma.shape[0] == weight.shape[1]: - # `DepthwiseConv2d` op weight - shape_list = [1, -1] + [1] * len(weight.shape[2:]) - _gamma = gamma.reshape(shape_list) - _sigma = sigma.reshape(shape_list) - else: - raise ValueError("Unsupported weight shape({})".format(weight.shape)) - - weight = weight * _gamma / _sigma - bias = beta - gamma * mean / sigma - return weight, bias - - -def without_fold_batchnorm(weight, cell_quant): - r""" - Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight. - - Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive. - - Args: - weight (numpy.ndarray): Weight of `cell_quant`. - cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`. - - Returns: - weight (numpy.ndarray): whihout folded weight. - bias (numpy.ndarray): without folded bias. - """ - variance = cell_quant.batchnorm.moving_variance.data.asnumpy() - mean = cell_quant.batchnorm.moving_mean.data.asnumpy() - gamma = cell_quant.batchnorm.gamma.data.asnumpy() - beta = cell_quant.batchnorm.beta.data.asnumpy() - epsilon = cell_quant.batchnorm.eps - if epsilon == 0: - raise ValueError("`epsilon` is zero may lead to divide zero error") - sigma = np.sqrt(variance + epsilon) - - if gamma.shape[0] == weight.shape[0]: - # `Conv2d` or `Dense` op weight - shape_list = [-1] + [1] * len(weight.shape[1:]) - _gamma = gamma.reshape(shape_list) - _sigma = sigma.reshape(shape_list) - elif gamma.shape[0] == weight.shape[1]: - # `DepthwiseConv2d` op weight - shape_list = [1, -1] + [1] * len(weight.shape[2:]) - _gamma = gamma.reshape(shape_list) - _sigma = sigma.reshape(shape_list) - else: - raise ValueError("Unsupported weight shape({})".format(weight.shape)) - - weight = weight * _gamma / _sigma - bias = beta - gamma * mean / sigma - return weight, bias - - -def compute_kl_threshold(data, bitwidth): - r""" - Using KL-J Distance to calculate the clip threshold. - - Args: - - **data** (NumpyArray) - Data observed to calculate the threshold for quantization, - - **bitwidth** (QuantDtype) - The datatype of quantization. - Outputs: - Tensor with Shape 1. Threshold to calculate the data. - """ - data_max = np.abs(data).max() - if data_max < 1e-5: - return 1e-5 - hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True) - # For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the - # largest size, turn to use the default bins config. - largest_bin_size = 1024 - if hist.shape[0] > largest_bin_size: - hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True) - sum_ = np.sum(hist) - if sum_ == 0: - hist = 0 - else: - hist = hist / sum_ - cumsum = np.cumsum(hist) - bit_pow_range = pow(2, int(bitwidth.num_bits) - 1) - threshold = [] - scaling_factor = [] - kl = [] - if bit_pow_range + 1 > len(bin_edges) - 1: - th_layer_out = bin_edges[-1] - return float(th_layer_out) - for i in range(bit_pow_range + 1, len(bin_edges), 1): - threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0]) - threshold = np.concatenate((threshold, [threshold_tmp])) - scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1) - scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp])) - # forward interpolation - cumsum_tmp = np.copy(cumsum) - cumsum_tmp[(i - 1):] = 1 - fwd_x = np.linspace(0.0, 1.0, bit_pow_range) - fwd_xp = np.linspace(0.0, 1.0, i) - fwd_fp = cumsum_tmp[:i] - forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp) - # backward interpolation - bwd_x = np.linspace(0.0, 1.0, i) - bwd_xp = np.linspace(0.0, 1.0, bit_pow_range) - bwd_fp = forward_interp - backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp) - cumsum_tmp[:i] = backward_interp - if 0 in cumsum_tmp: - raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error") - kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp)) # Kullback-Leibler-J - kl = np.concatenate((kl, [kl_tmp])) - th_layer_out = threshold[np.argmin(kl)] - threshold = float(th_layer_out) - if threshold < 1e-5: - threshold = 1e-5 - return threshold - - -def query_quant_layers(network): - r""" - Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the - quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized - layers, which not exist in practical execution, may appear. - - Args: - network (Cell): input network - - Examples: - >>> from mindspore.compression.quant import QuantizationAwareTraining - >>> from mindspore.compression.quant.quant_utils import query_quant_layers - >>> class LeNet5(nn.Cell): - ... def __init__(self, num_class=10, channel=1): - ... super(LeNet5, self).__init__() - ... self.type = "fusion" - ... self.num_class = num_class - ... - ... # change `nn.Conv2d` to `nn.Conv2dBnAct` - ... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu') - ... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu') - ... # change `nn.Dense` to `nn.DenseBnAct` - ... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') - ... self.fc2 = nn.DenseBnAct(120, 84, activation='relu') - ... self.fc3 = nn.DenseBnAct(84, self.num_class) - ... - ... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - ... self.flatten = nn.Flatten() - ... - ... def construct(self, x): - ... x = self.conv1(x) - ... x = self.max_pool2d(x) - ... x = self.conv2(x) - ... x = self.max_pool2d(x) - ... x = self.flatten(x) - ... x = self.fc1(x) - ... x = self.fc2(x) - ... x = self.fc3(x) - ... return x - ... - >>> net = LeNet5() - >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False]) - >>> net_qat = quantizer.quantize(net) - >>> query_quant_layers(net_qat) - conv1.conv.fake_quant_weight INT8 - conv1.activation.fake_quant_act INT8 - conv2.conv.fake_quant_weight INT8 - conv2.activation.fake_quant_act INT8 - fc1.dense.fake_quant_weight INT8 - fc1.activation.fake_quant_act INT8 - fc2.dense.fake_quant_weight INT8 - fc2.activation.fake_quant_act INT8 - fc3.dense.fake_quant_weight INT8 - fc3.activation.fake_quant_act INT8 - """ - network = Validator.check_isinstance("network", network, nn.Cell) - tplt = "{0:60}\t{1:10}" - for cell_and_name in network.cells_and_names(): - cell_name = cell_and_name[0] - cell = cell_and_name[1] - if isinstance(cell, nn.FakeQuantWithMinMaxObserver): - logger.info(tplt.format(cell_name, cell.quant_dtype)) - - -def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None): - r""" - Load fp32 model parameters into quantization model. - - Args: - quant_model(Cell): Quantization model. - params_dict(dict): Parameter dict that stores fp32 parameters. - quant_new_params(list): Parameters that exist in quantization network but not in non-quantization - network. Default: None. - - Raises: - TypeError: If `quant_new_params` is not None and is not list. - ValueError: If there are parameters in the `quant_model` that are neither in `params_dict` - nor in `quant_new_params`. - - Examples: - >>> import mindspore as ms - >>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net - >>> class LeNet5(nn.Cell): - ... def __init__(self, num_class=10, channel=1): - ... super(LeNet5, self).__init__() - ... self.type = "fusion" - ... self.num_class = num_class - ... - ... # change `nn.Conv2d` to `nn.Conv2dBnAct` - ... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu') - ... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu') - ... # change `nn.Dense` to `nn.DenseBnAct` - ... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') - ... self.fc2 = nn.DenseBnAct(120, 84, activation='relu') - ... self.fc3 = nn.DenseBnAct(84, self.num_class) - ... - ... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - ... self.flatten = nn.Flatten() - ... - ... def construct(self, x): - ... x = self.conv1(x) - ... x = self.max_pool2d(x) - ... x = self.conv2(x) - ... x = self.max_pool2d(x) - ... x = self.flatten(x) - ... x = self.fc1(x) - ... x = self.fc2(x) - ... x = self.fc3(x) - ... return x - ... - >>> net = LeNet5() - >>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt" - >>> param_dict = ms.load_checkpoint(ckpt_file_name) - >>> load_nonquant_param_into_quant_net(net, param_dict) - """ - if quant_new_params is not None and not isinstance(quant_new_params, list): - raise TypeError("quant_new_params must be list or None.") - iterable_dict = { - 'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))), - 'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))), - 'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items()))) - } - for param in params_dict.items(): - key_name = param[0].split(".")[-1] - if key_name not in iterable_dict: - iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value), - params_dict.items()))) - - for name, param in quant_model.parameters_and_names(): - key_name = name.split(".")[-1] - if key_name not in iterable_dict.keys(): - if key_name not in quant_new_params: - raise ValueError(f"Can't find match parameter in ckpt, param name = {name}") - continue - value_param = next(iterable_dict[key_name], None) - if value_param: - param.set_data(value_param[1].data) - logger.info(f'init model param {name} with checkpoint param {value_param[0]}') - - # Perform KL_init when learned scale quantization is executed. - for cell_and_name in quant_model.cells_and_names(): - cell = cell_and_name[1] - if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant, - nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE": - subcell_weight_para = cell.weight.data.asnumpy() - if hasattr(cell, 'gamma'): - scale_factor = (cell.gamma.data.asnumpy() / - np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5)) - subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1) - - if cell.fake_quant_weight.per_channel: - max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype) - for weight_para_each in subcell_weight_para] - min_init = [-x for x in max_init] - else: - max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)] - min_init = [-x for x in max_init] - - cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype, - min_init=min_init, max_init=max_init) diff --git a/mindspore/python/mindspore/compression/quant/quantizer.py b/mindspore/python/mindspore/compression/quant/quantizer.py deleted file mode 100644 index 571123a8d50..00000000000 --- a/mindspore/python/mindspore/compression/quant/quantizer.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Note: - Base Class of Quantizer. This is interface that is subject to change or deletion. -""" - -from __future__ import absolute_import -from abc import ABC, abstractmethod -from enum import Enum - -from mindspore._checkparam import Validator - -__all__ = ["OptimizeOption"] - - -class OptimizeOption(Enum): - r""" - An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`. - """ - # using quantization aware training - QAT = "QAT" - - # using the learned scale quantization - LEARNED_SCALE = "LEARNED_SCALE" - - def __str__(self): - return str(self.value) - - -class Quantizer(ABC): - """ - Base class of Quantizer. You can implement different kind of quantizer to get different quantization result. - - Notes: - This class is an abstract class. - - Args: - optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default: - OptimizeOption.QAT. - """ - def __init__(self, - optimize_option=OptimizeOption.QAT): - if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple): - optimize_option = [optimize_option] - for option in optimize_option: - option = Validator.check_isinstance("optimize_option", option, OptimizeOption) - self.optimize_option = optimize_option - - @abstractmethod - def quantize(self, network): - """ - Quant API to convert input network to a quantization aware training network - Args: - network (Cell): network to be quantized. - """ diff --git a/mindspore/python/mindspore/nn/layer/__init__.py b/mindspore/python/mindspore/nn/layer/__init__.py index 255339f6016..4ce3352d2b3 100644 --- a/mindspore/python/mindspore/nn/layer/__init__.py +++ b/mindspore/python/mindspore/nn/layer/__init__.py @@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network. from __future__ import absolute_import from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \ - image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense + image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense from mindspore.nn.layer.activation import * from mindspore.nn.layer.normalization import * from mindspore.nn.layer.container import * @@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import * from mindspore.nn.layer.embedding import * from mindspore.nn.layer.pooling import * from mindspore.nn.layer.image import * -from mindspore.nn.layer.quant import * from mindspore.nn.layer.math import * from mindspore.nn.layer.combined import * from mindspore.nn.layer.timedistributed import * @@ -53,7 +52,6 @@ __all__.extend(basic.__all__) __all__.extend(embedding.__all__) __all__.extend(pooling.__all__) __all__.extend(image.__all__) -__all__.extend(quant.__all__) __all__.extend(math.__all__) __all__.extend(combined.__all__) __all__.extend(timedistributed.__all__) diff --git a/mindspore/python/mindspore/nn/layer/quant.py b/mindspore/python/mindspore/nn/layer/quant.py deleted file mode 100644 index 8df53dccf62..00000000000 --- a/mindspore/python/mindspore/nn/layer/quant.py +++ /dev/null @@ -1,1868 +0,0 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Quantization aware training.""" -from __future__ import absolute_import - -from functools import partial -from collections import namedtuple -import numpy as np - -import mindspore.common.dtype as mstype -from mindspore.ops.primitive import Primitive -from mindspore.ops import operations as P -from mindspore.common.parameter import Parameter -from mindspore.common.initializer import initializer -from mindspore.common.tensor import Tensor -from mindspore._checkparam import Validator, twice -from mindspore.compression.common import QuantDtype -import mindspore.context as context -from mindspore.nn.layer.normalization import BatchNorm2d -from mindspore.nn.layer.activation import get_activation -from mindspore.nn.cell import Cell -from mindspore import nn -from mindspore.ops.operations import _quant_ops as Q -from mindspore.nn.layer.combined import Conv2dBnAct -from mindspore.nn.layer.conv import Conv2d -from mindspore.nn.layer.basic import Dense - -__all__ = [ - 'FakeQuantWithMinMaxObserver', - 'Conv2dBnFoldQuantOneConv', - 'Conv2dBnFoldQuant', - 'Conv2dBnWithoutFoldQuant', - 'Conv2dQuant', - 'DenseQuant', - 'ActQuant', - 'TensorAddQuant', - 'MulQuant', -] - - -class BatchNormFoldCell(Cell): - """ - Batch Normalization folded. - - Args: - momentum (float): Momentum value must be [0, 1]. Default: 0.9. - epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in - float32 else 1e-3. Default: 1e-5. - freeze_bn (int): Delay in steps at which computation switches from regular batch - norm to frozen mean and std. Default: 0. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C, H, W)`. - - **mean** (Tensor) - Tensor of shape :math:`(C,)`. - - **variance** (Tensor) - Tensor of shape :math:`(C,)`. - - **global_step** (Tensor) - Tensor to record current global step. - - Outputs: - Tuple of 4 Tensor, the normalized input and the updated parameters. - - - **batch_mean** (Tensor) - Tensor of shape :math:`(C,)`. - - **batch_std** (Tensor) - Tensor of shape :math:`(C,)`. - - **running_mean** (Tensor) - Tensor of shape :math:`(C,)`. - - **running_std** (Tensor) - Tensor of shape :math:`(C,)`. - """ - - def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0): - """Initialize batch norm fold layer""" - super(BatchNormFoldCell, self).__init__() - self.epsilon = epsilon - self.is_gpu = context.get_context('device_target') == "GPU" - if self.is_gpu: - self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) - self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn) - else: - self.bn_reduce = P.BNTrainingReduce() - self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn) - - def construct(self, x, mean, variance, global_step): - if self.is_gpu: - if self.training: - batch_mean, batch_std, running_mean, running_std = self.bn_train(x, mean, variance, global_step) - else: - batch_mean, batch_std, running_mean, running_std = self.bn_infer(x, mean, variance, global_step) - else: - if self.training: - x_sum, x_square_sum = self.bn_reduce(x) - _, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated = \ - self.bn_update(x, x_sum, x_square_sum, mean, variance) - P.Assign()(mean, mean_updated) - P.Assign()(variance, variance_updated) - else: - batch_mean = P.ZerosLike()(variance) - batch_std = P.OnesLike()(variance) - running_mean = P.Add()(mean, 0.) - running_std = P.Sqrt()(P.Add()(variance, self.epsilon)) - return batch_mean, batch_std, running_mean, running_std - - -def _partial_init(cls_or_self, **kwargs): - """ - Wrapper that allows creation of class factories. - - This can be useful when there is a need to create classes with the same - constructor arguments, but different instances. - - Examples: - >>> class Foo: - ... def __init__(self, a, b, answer): - ... pass - >>> Foo.partial_init = classmethod(_partial_init) - >>> foo_builder = Foo.partial_init(a=3, b=4).partial_init(answer=42) - >>> foo_instance1 = foo_builder() - >>> foo_instance2 = foo_builder() - >>> result = (id(foo_instance1) == id(foo_instance2)) - >>> print(result) - False - """ - - class _PartialWrapper: - r""" - class of wrapper that allows creation of class factories. - """ - - partial_init = _partial_init - - def __init__(self, p): - self.p = p - - def __call__(self, *args, **keywords): - return self.p(*args, **keywords) - - def __repr__(self): - return self.p.__repr__() - - r = _PartialWrapper(partial(cls_or_self, **kwargs)) - return r - - -class _Observer(Cell): - """ - Base class of Observer. Observer is used to calculate the statistics of specific layer. - - Notes: - This class is an abstract class. - - Args: - quant_dtype (QuantDtype): The type of FakeQuant data. - """ - - partial_init = classmethod(_partial_init) - - def __init__(self, quant_dtype): - """Initialize _Observer.""" - super(_Observer, self).__init__() - self.quant_dtype = quant_dtype - - def extend_repr(self): - s = f"quant_dtype={self.quant_dtype}" - return s - - def construct(self): - pass - - -class UniformQuantObserver(_Observer): - """ - The base class of Uniform Quantization Observer. - - Args: - quant_dtype (QuantDtype): The type of FakeQuant data. Default: QuantDtype.INT8. - per_channel (bool): Quantization granularity based on layer or on channel. Default: False. - symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. - narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. - num_channels (int): declarate the min and max channel size, Default: 1. - - Returns: - Tensor. - """ - - min_max_map = { - QuantDtype.INT2: (-2, 1), - QuantDtype.INT3: (-4, 3), - QuantDtype.INT4: (-8, 7), - QuantDtype.INT5: (-16, 15), - QuantDtype.INT6: (-32, 31), - QuantDtype.INT7: (-64, 63), - QuantDtype.INT8: (-128, 127), - - QuantDtype.UINT2: (0, 3), - QuantDtype.UINT3: (0, 7), - QuantDtype.UINT4: (0, 15), - QuantDtype.UINT5: (0, 31), - QuantDtype.UINT6: (0, 63), - QuantDtype.UINT7: (0, 127), - QuantDtype.UINT8: (0, 255) - } - - def __init__(self, quant_dtype=QuantDtype.INT8, per_channel=False, symmetric=False, narrow_range=False, - num_channels=1): - """Initialize UniformQuantObserver.""" - super(UniformQuantObserver, self).__init__(quant_dtype) - self.per_channel = per_channel - self.symmetric = symmetric - self.narrow_range = narrow_range - self.num_channels = num_channels - - -class FakeQuantWithMinMaxObserver(UniformQuantObserver): - r""" - Quantization aware operation which provides the fake quantization observer function on data with min and max. - - The detail of the quantization mode `DEFAULT` is described as below: - - The running min/max :math:`x_{min}` and :math:`x_{max}` are computed as: - - .. math:: - - \begin{array}{ll} \\ - x_{min} = - \begin{cases} - \min(\min(X), 0) - & \text{ if } ema = \text{False} \\ - \min((1 - c) \min(X) + \text{c } x_{min}, 0) - & \text{ if } \text{otherwise} - \end{cases}\\ - x_{max} = - \begin{cases} - \max(\max(X), 0) - & \text{ if } ema = \text{False} \\ - \max((1 - c) \max(X) + \text{c } x_{max}, 0) - & \text{ if } \text{otherwise} - \end{cases} - \end{array} - - where X is the input tensor, and :math:`c` is the `ema_decay`. - - The scale and zero point zp is computed as: - - .. math:: - - \begin{array}{ll} \\ - scale = - \begin{cases} - \frac{x_{max} - x_{min}}{Q_{max} - Q_{min}} - & \text{ if } symmetric = \text{False} \\ - \frac{2\max(x_{max}, \left | x_{min} \right |) }{Q_{max} - Q_{min}} - & \text{ if } \text{otherwise} - \end{cases}\\ - zp\_min = Q_{min} - \frac{x_{min}}{scale} \\ - zp = \left \lfloor \min(Q_{max}, \max(Q_{min}, zp\_min)) + 0.5 \right \rfloor - \end{array} - - where :math:`Q_{max}` and :math:`Q_{min}` is decided by quant_dtype, for example, if quant_dtype=INT8, - then :math:`Q_{max} = 127` and :math:`Q_{min} = -128`. - - The fake quant output is computed as: - - .. math:: - - \begin{array}{ll} \\ - u_{min} = (Q_{min} - zp) * scale \\ - u_{max} = (Q_{max} - zp) * scale \\ - u_X = \left \lfloor \frac{\min(u_{max}, \max(u_{min}, X)) - u_{min}}{scale} - + 0.5 \right \rfloor \\ - output = u_X * scale + u_{min} - \end{array} - - The detail of the quantization mode `LEARNED_SCALE` is described as below: - - The fake quant output is computed as: - - .. math:: - - \bar{X}=\left\{\begin{matrix} - clip\left ( \frac{X}{maxq},0,1\right ) \qquad \quad if\quad neg\_trunc\\ - clip\left ( \frac{X}{maxq},-1,1\right )\qquad \ if\quad otherwise - \end{matrix}\right. \\ - - output=\frac{floor\left ( \bar{X}\ast Q_{max}+0.5 \right ) \ast scale }{Q_{max}} - - where X is the input tensor. - where :math:`Q_{max}` (quant_max) is decided by quant_dtype and neg_trunc, for example, if quant_dtype=INT8 - and neg_trunc works, :math:`Q_{max} = 256` , otherwise :math:`Q_{max} = 127`. - - The maxq is updated by training, and its gradient is calculated as follows: - - .. math:: - - \frac{\partial \ output}{\partial \ maxq} = \left\{\begin{matrix} - -\frac{X}{maxq}+\left \lfloor \frac{X}{maxq} \right \rceil \qquad if\quad bound_{lower}< \frac{X}{maxq}< 1\\ - -1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\le bound_{lower}\\ - 1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\ge 1 \qquad \quad - \end{matrix}\right. \\ - - bound_{lower}= - \left\{\begin{matrix} - 0\qquad \quad if\quad neg\_trunc\\ - -1\qquad if\quad otherwise - \end{matrix}\right. - - Then minq is computed as: - - .. math:: - - minq=\left\{\begin{matrix} - 0 \qquad \qquad \quad if\quad neg\_trunc\\ - -maxq\qquad if\quad otherwise - \end{matrix}\right. - - When exporting, the scale and zero point zp is computed as: - - .. math:: - - scale=\frac{maxq}{quant\_max} ,\quad zp=0 \\ - - zp is equal to 0 consistently, due to the LEARNED_SCALE`s symmetric nature. - - Args: - min_init (int, float, list): The initialized min value. Default: -6. - max_init (int, float, list): The initialized max value. Default: 6. - ema (bool): The exponential Moving Average algorithm updates min and max. Default: False. - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. - per_channel (bool): Quantization granularity based on layer or on channel. Default: False. - channel_axis (int): Quantization by channel axis. Default: 1. - num_channels (int): declarate the min and max channel size, Default: 1. - quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8. - symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False. - narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False. - quant_delay (int): Quantization delay parameters according to the global step. Default: 0. - neg_trunc (bool): Whether the quantization algorithm uses negative truncation or not. Default: False. - mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported. - Default: ("DEFAULT") - Inputs: - - **x** (Tensor) - The input of FakeQuantWithMinMaxObserver. The input dimension is preferably 2D or 4D. - - Outputs: - Tensor, with the same type and shape as the `x`. - - Raises: - TypeError: If `min_init` or `max_init` is not int, float or list. - TypeError: If `quant_delay` is not an int. - ValueError: If `quant_delay` is less than 0. - ValueError: If `min_init` is not less than `max_init`. - ValueError: If `mode` is neither `DEFAULT` nor `LEARNED_SCALE`. - ValueError: If `mode` is `LEARNED_SCALE` and `symmetric` is not `True`. - ValueError: If `mode` is `LEARNED_SCALE`, and `narrow_range` is not `True` unless when `neg_trunc` is `True`. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore import Tensor - >>> fake_quant = nn.FakeQuantWithMinMaxObserver() - >>> x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32) - >>> result = fake_quant(x) - >>> print(result) - [[ 0.9882355 1.9764705 0.9882355] - [-1.9764705 0. -0.9882355]] - """ - - def __init__(self, - min_init=-6, - max_init=6, - ema=False, - ema_decay=0.999, - per_channel=False, - channel_axis=1, - num_channels=1, - quant_dtype=QuantDtype.INT8, - symmetric=False, - narrow_range=False, - quant_delay=0, - neg_trunc=False, - mode="DEFAULT"): - """Initialize FakeQuantWithMinMaxObserver""" - super(FakeQuantWithMinMaxObserver, self).__init__(quant_dtype=quant_dtype, per_channel=per_channel, - symmetric=symmetric, narrow_range=narrow_range, - num_channels=num_channels) - Validator.check_value_type("min_init", min_init, [int, float, list], type(self).__name__) - Validator.check_value_type("max_init", max_init, [int, float, list], type(self).__name__) - Validator.check_non_negative_int(quant_delay, 'quant_delay', self.cls_name) - self.min_init = min_init - self.max_init = max_init - self.quant_dtype = quant_dtype - self.num_bits = quant_dtype.num_bits - self.ema = ema - self.ema_decay = ema_decay - self.per_channel = per_channel - self.num_channels = num_channels - self.channel_axis = channel_axis - self.quant_delay = quant_delay - self.symmetric = symmetric - self.narrow_range = narrow_range - self.neg_trunc = neg_trunc - self.mode = mode - self.is_ascend = context.get_context('device_target') == "Ascend" - self.Neg = P.Neg() - - min_array = self._get_init_array(self.min_init) - max_array = self._get_init_array(self.max_init) - if not np.greater(max_array, min_array).all(): - raise ValueError(f"For '{self.cls_name}', the 'max_init' must be greater than 'min_init', " - f"but got 'max_init': {max_init}, 'min_init': {min_init}.") - if self.mode == "DEFAULT": - self._default_init(min_array, max_array) - elif self.mode == "LEARNED_SCALE": - self._learned_scale_init(min_array, max_array) - else: - raise ValueError(f"For '{self.cls_name}', only `DEFAULT` and `LEARNED_SCALE` mode are valid, but got " - f"'mode': {self.mode}.") - - def reset(self, quant_dtype=QuantDtype.INT8, min_init=-6, max_init=6): - r""" - Reset the quant max parameter (eg. 256) and the initial value of the minq parameter and maxq parameter, - this function is currently only valid for `LEARNED_SCALE` mode. - - Args: - quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8. - min_init (int, float, list): The initialized min value. Default: -6. - max_init (int, float, list): The initialized max value. Default: 6. - """ - if self.mode == "LEARNED_SCALE": - self.quant_dtype = quant_dtype - self.num_bits = quant_dtype.num_bits - self._calculate_quant_max() - if self.neg_trunc: - min_init = 0 - - self.min_init = min_init - self.max_init = max_init - min_array = self._get_init_array(self.min_init) - max_array = self._get_init_array(self.max_init) - if not np.greater(max_array, min_array).all(): - raise ValueError(f"For '{self.cls_name}', the 'max_init' must be greater than 'min_init', " - f"but got 'max_init': {max_init}, 'min_init': {min_init}.") - - self.minq.set_data(Tensor(min_array)) - self.maxq.set_data(Tensor(max_array)) - self.quant_max.set_data(Tensor(np.array([self._quant_max]).astype(np.float32))) - else: - raise ValueError(f"For '{self.cls_name}', only `LEARNED_SCALE` mode is valid, but got 'mode': {self.mode}.") - - def _default_init(self, min_array, max_array): - """ - Initialization of `DEFAULT`(QAT) mode. - """ - # init tensor min and max for fake quantized operation - self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) - self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) - - # init fake quant relative op - if self.per_channel: - quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis) - ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis) - else: - quant_fun = Q.FakeQuantPerLayer - ema_fun = Q.MinMaxUpdatePerLayer - - self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay) - if self.is_ascend: - self.fake_quant_train = quant_fun(num_bits=self.quant_dtype.num_bits, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - quant_delay=self.quant_delay) - self.fake_quant_infer = self.fake_quant_train - else: - quant_fun = partial(quant_fun, - ema=self.ema, - ema_decay=self.ema_decay, - num_bits=self.quant_dtype.num_bits, - symmetric=self.symmetric, - narrow_range=self.narrow_range, - quant_delay=self.quant_delay) - self.fake_quant_train = quant_fun(training=True) - self.fake_quant_infer = quant_fun(training=False) - - def _learned_scale_init(self, min_array, max_array): - """ - Initialization of `LEARNED_SCALE` mode. - """ - if not self.symmetric: - raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support 'symmetric' quant, " - f"but got 'symmetric': {self.symmetric}. Please set 'symmetric' to True.") - if self.neg_trunc: - min_array = self._get_init_array(0) - if self.narrow_range: - raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support the combination of " - f"'neg_trunc=True and narrow_range=False' config scenario, but got 'narrow_range': " - f"{self.narrow_range}.") - elif not self.narrow_range: - raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support 'narrow_range=True' " - f"config, except for 'neg_trunc=True' scenario. But got 'narrow_range': " - f"{self.narrow_range}.") - - self._calculate_quant_max() - - self.minq = Parameter(Tensor(min_array), name='minq') - self.maxq = Parameter(Tensor(max_array), name='maxq') - self.quant_max = Parameter(Tensor(np.array([self._quant_max]).astype(np.float32)), - name="quant_max", requires_grad=False) - - # init fake quant relative op - if self.per_channel: - quant_fun = partial(Q.FakeLearnedScaleQuantPerChannel, channel_axis=self.channel_axis) - else: - quant_fun = Q.FakeLearnedScaleQuantPerLayer - - quant_fun = partial(quant_fun, - quant_delay=self.quant_delay, - neg_trunc=self.neg_trunc) - self.fake_quant_train = quant_fun(training=True) - self.fake_quant_infer = quant_fun(training=False) - - def _get_init_array(self, init_date): - """ - Convert the initial value to array. - """ - if isinstance(init_date, list) and self.per_channel and len(init_date) != self.num_channels: - raise ValueError(f"For '{self.cls_name}', the length of 'min_init/max_init' list must be equal to " - f"'num_channels' for perchannel quant scenario, but got 'min_init/max_init': {init_date} " - f"and num_channels: {self.num_channels}.") - if isinstance(init_date, list) and not self.per_channel and len(init_date) != 1: - raise ValueError(f"For '{self.cls_name}', the length of the 'min_init/max_init' list must be 1 for " - f"perlayer quant scenario, but got {len(init_date)}.") - - if isinstance(init_date, list): - min_max_array = np.array(init_date).astype(np.float32) - elif self.per_channel and not isinstance(init_date, list): - min_max_array = np.array([init_date] * self.num_channels).astype(np.float32) - else: - min_max_array = np.array([init_date]).astype(np.float32) - return min_max_array - - def _calculate_quant_max(self): - """ - The quantization range is calculated according to num_bits. - """ - if not self.neg_trunc: - self._quant_max = (1 << (self.num_bits - 1)) - 1 - else: - self._quant_max = (1 << self.num_bits) - 1 - - def extend_repr(self): - """Display instance object as string.""" - s = 'quant_dtype={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \ - 'quant_delay={}, min_init={}, max_init={}'.format(self.quant_dtype, self.symmetric, self.narrow_range, - self.ema, self.ema_decay, self.per_channel, - self.channel_axis, self.num_channels, self.quant_delay, - self.min_init, self.max_init) - return s - - def construct(self, x): - if self.mode == "LEARNED_SCALE": - if self.training: - out = self.fake_quant_train(x, self.maxq, self.quant_max) - if not self.neg_trunc: - self.minq = self.Neg(self.maxq) - else: - out = self.fake_quant_infer(x, self.maxq, self.quant_max) - else: - if self.training: - min_up, max_up = self.ema_update(x, self.minq, self.maxq) - self.minq = min_up - self.maxq = max_up - out = self.fake_quant_train(x, self.minq, self.maxq) - else: - out = self.fake_quant_infer(x, self.minq, self.maxq) - return out - - -QuantConfig = namedtuple("QuantConfig", ['weight', 'activation']) - -quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - activation=FakeQuantWithMinMaxObserver.partial_init()) - - -class Conv2dBnFoldQuantOneConv(Cell): - r""" - 2D convolution which use the convolution layer statistics once to calculate Batch Normalization - operation folded construct. - - This part is a more detailed overview of Conv2d operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`FakeQuantWithMinMaxObserver`. - - .. math:: - w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma ) - - b=\frac{-\mu _{G} }{\sqrt{var_{G}+\epsilon }}*\gamma +\beta - - y=w_{q}\times x+b - - where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of - subclass of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - `mu _{G}` and `var_{G}` represent the global mean and variance respectively. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window. - stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. - pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. - dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. - group (int): Splits filter into groups, `in_channels` and `out_channels` must be - divisible by the number of groups. Default: 1. - eps (float): Parameters for Batch Normalization. Default: 1e-5. - momentum (float): Parameters for Batch Normalization op. Default: 0.997. - has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - convolution kernel. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - bias vector. Default: 'zeros'. - beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - beta vector. Default: 'zeros'. - gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - gamma vector. Default: 'ones'. - mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - mean vector. Default: 'zeros'. - var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - variance vector. Default: 'ones'. - fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Raises: - TypeError: If `in_channels`, `out_channels` or `group` is not an int. - TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. - TypeError: If `has_bias` or `fake` is not a bool. - TypeError: If `data_format` is not a string. - ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. - ValueError: If `padding` is less than 0. - ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> conv2d_bnfold = nn.Conv2dBnFoldQuantOneConv(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid", - ... weight_init="ones", quant_config=qconfig) - >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32) - >>> result = conv2d_bnfold(x) - >>> print(result) - [[[[5.9296875 13.8359375] - [11.859375 17.78125]]]] - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - eps=1e-5, - momentum=0.997, - has_bias=False, - weight_init='normal', - bias_init='zeros', - beta_init='zeros', - gamma_init='ones', - mean_init='zeros', - var_init='ones', - fake=True, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize Conv2dBnFoldQuant layer""" - super(Conv2dBnFoldQuantOneConv, self).__init__() - self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) - self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) - self.kernel_size = twice(kernel_size) - self.stride = twice(stride) - self.dilation = twice(dilation) - for kernel_size_elem in self.kernel_size: - Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) - for stride_elem in self.stride: - Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) - for dilation_elem in self.dilation: - Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) - if pad_mode not in ('valid', 'same', 'pad'): - raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values " - f"in ('valid', 'same', 'pad'), but got {pad_mode}.") - self.pad_mode = pad_mode - if isinstance(padding, int): - Validator.check_non_negative_int(padding, 'padding', self.cls_name) - self.padding = padding - elif isinstance(padding, tuple): - for pad in padding: - Validator.check_non_negative_int(pad, 'padding item', self.cls_name) - self.padding = padding - else: - raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), but got " - f"{type(padding).__name__}!") - self.group = Validator.check_positive_int(group, "group", self.cls_name) - self.eps = eps - self.momentum = 1 - momentum - self.has_bias = has_bias - self.fake = Validator.check_bool(fake, "fake", self.cls_name) - self.quant_config = quant_config - data_format = 'NCHW' - self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) - self._target = context.get_context("device_target") - self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE - self.is_ge_backend = False - if context.get_context("enable_ge"): - self.is_ge_backend = True - self.enable_default_train = self.is_graph_mode and \ - (self.is_ge_backend or self._target == "Ascend") - - # initialize convolution op and Parameter - self.conv = P.Conv2D(out_channel=out_channels, - kernel_size=self.kernel_size, - pad_mode=pad_mode, - pad=padding, - stride=self.stride, - dilation=self.dilation, - group=group) - weight_shape = [out_channels, in_channels // group, *self.kernel_size] - channel_axis = 0 - self.channel_axis = channel_axis - self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') - self.bias_add = P.BiasAdd() - self.bias = None - if Validator.check_bool(has_bias, "has_bias", self.cls_name): - self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') - - # initialize BatchNorm Parameter - self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma') - self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta') - self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False) - self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance', - requires_grad=False) - - # initialize fake ops - self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis, - num_channels=out_channels) - self.freeze_bn = False - self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps, - momentum=self.momentum, data_format=self.format) - - self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format) - self.sub_mean = P.Sub() - self.sub_var = P.Sub() - self.mul_mean = P.Mul() - self.mul_var = P.Mul() - self.assign_sub_mean = P.AssignSub() - self.assign_sub_var = P.AssignSub() - self.reshape = P.Reshape() - - @classmethod - def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig): - """ - A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct` - - Examples: - >>> from mindspore import nn - >>> ic = 10 - >>> oc = 100 - >>> kernel_size = 3 - >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size) - >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted, - >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - ... activation=FakeQuantWithMinMaxObserver.partial_init()) - >>> conv_bn_quant = nn.Conv2dBnFoldQuantOneConv.from_float(conv_bn_op, quant_config) - """ - - kwargs = {'in_channels': convbn.conv.in_channels, - 'out_channels': convbn.conv.out_channels, - 'kernel_size': convbn.conv.kernel_size, - 'stride': convbn.conv.stride, - 'pad_mode': convbn.conv.pad_mode, - 'padding': convbn.conv.padding, - 'dilation': convbn.conv.dilation, - 'group': convbn.conv.group, - 'has_bias': convbn.conv.has_bias, - 'bias_init': convbn.conv.bias_init, - 'weight_init': convbn.conv.weight_init, - 'quant_config': quant_config, - 'fake': True, - } - if hasattr(convbn, 'batchnorm'): - kwargs['eps'] = convbn.batchnorm.eps - kwargs['momentum'] = convbn.batchnorm.momentum - kwargs['beta_init'] = convbn.batchnorm.beta_init - kwargs['gamma_init'] = convbn.batchnorm.gamma_init - kwargs['mean_init'] = convbn.batchnorm.moving_mean_init - kwargs['var_init'] = convbn.batchnorm.moving_var_init - return cls(**kwargs) - - def extend_repr(self): - """Display instance object as string.""" - s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \ - 'pad_mode={}, padding={}, dilation={}, group={}, ' \ - 'fake={}, momentum={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, - self.pad_mode, self.padding, self.dilation, self.group, self.fake, - self.momentum) - return s - - def construct(self, x): - running_std = P.Sqrt()(P.Add()(self.moving_variance, self.eps)) - scale_factor = self.gamma / running_std - if self.channel_axis: - scale_factor = self.reshape(scale_factor, (1, -1, 1, 1)) - else: - scale_factor = self.reshape(scale_factor, (-1, 1, 1, 1)) - weight = self.weight * scale_factor - if self.fake: - weight = self.fake_quant_weight(weight) - conv = self.conv(x, weight) - - if self.freeze_bn: - return conv + self.reshape((self.beta - self.gamma * self.moving_mean / running_std), (1, -1, 1, 1)) - scale_factor = self.reshape(scale_factor, (1, -1, 1, 1)) - if self.enable_default_train: - scale_factor = P.Reciprocal()(scale_factor) - conv_orig = conv * scale_factor - else: - conv_orig = conv / scale_factor - if self.training: - return self.bn_train(conv_orig, - self.gamma, - self.beta, - self.moving_mean, - self.moving_variance)[0] - - return self.bn_infer(conv_orig, - self.gamma, - self.beta, - self.moving_mean, - self.moving_variance)[0] - - -class Conv2dBnFoldQuant(Cell): - r""" - 2D convolution with Batch Normalization operation folded construct. - - This part is a more detailed overview of Conv2d operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`FakeQuantWithMinMaxObserver`. - - .. math:: - y = x\times w+ b - - w_{q}=quant(\frac{w}{\sqrt{Var[y]+\epsilon}}*\gamma ) - - y_{out}= w_{q}\times x+\frac{b-E[y]}{\sqrt{Var[y]+\epsilon}}*\gamma +\beta - - where :math:`quant` is the continuous execution of quant and dequant. Two convolution - and Batch Normalization operation are used here, the purpose of the first convolution and Batch Normalization - is to count the mean `E[y]` and variance `Var[y]` of current batch output for quantization. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window. - stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. - pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. - dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. - group (int): Splits filter into groups, `in_channels` and `out_channels` must be - divisible by the number of groups. Default: 1. - eps (float): Parameters for Batch Normalization. Default: 1e-5. - momentum (float): Parameters for Batch Normalization op. Default: 0.997. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - convolution kernel. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - bias vector. Default: 'zeros'. - beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - beta vector. Default: 'zeros'. - gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - gamma vector. Default: 'ones'. - mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - mean vector. Default: 'zeros'. - var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the - variance vector. Default: 'ones'. - fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step. - Default: 100000. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Raises: - TypeError: If `in_channels`, `out_channels` or `group` is not an int. - TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. - TypeError: If `has_bias` or `fake` is not a bool. - ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. - ValueError: If `padding` is less than 0. - ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid", - ... weight_init="ones", quant_config=qconfig) - >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32) - >>> result = conv2d_bnfold(x) - >>> print(result) - [[[[5.9296875 13.8359375] - [11.859375 17.78125]]]] - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - eps=1e-5, - momentum=0.997, - has_bias=False, - weight_init='normal', - bias_init='zeros', - beta_init='zeros', - gamma_init='ones', - mean_init='zeros', - var_init='ones', - fake=True, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8, - freeze_bn=100000): - """Initialize Conv2dBnFoldQuant layer""" - super(Conv2dBnFoldQuant, self).__init__() - if context.get_context('device_target') == "CPU": - raise ValueError(f"For '{self.cls_name}', only the 'Ascend' and 'GPU' platforms" - f" are supported, but got {context.get_context('device_target')}.") - self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) - self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) - self.kernel_size = twice(kernel_size) - self.stride = twice(stride) - self.dilation = twice(dilation) - for kernel_size_elem in self.kernel_size: - Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) - for stride_elem in self.stride: - Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) - for dilation_elem in self.dilation: - Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) - if pad_mode not in ('valid', 'same', 'pad'): - raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values in " - f"('valid', 'same', 'pad'), but got {pad_mode}.") - self.pad_mode = pad_mode - if isinstance(padding, int): - Validator.check_non_negative_int(padding, 'padding', self.cls_name) - self.padding = padding - elif isinstance(padding, tuple): - for pad in padding: - Validator.check_non_negative_int(pad, 'padding item', self.cls_name) - self.padding = padding - else: - raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), " - f"but got {type(padding).__name__}!") - self.group = Validator.check_positive_int(group, "group", self.cls_name) - self.eps = eps - self.momentum = momentum - self.has_bias = has_bias - self.freeze_bn = freeze_bn - self.fake = Validator.check_bool(fake, "fake", self.cls_name) - self.quant_config = quant_config - self.quant_dtype = quant_dtype - self.is_gpu = context.get_context('device_target') == "GPU" - - # initialize convolution op and Parameter - self.conv = P.Conv2D(out_channel=out_channels, - kernel_size=self.kernel_size, - pad_mode=pad_mode, - pad=padding, - stride=self.stride, - dilation=self.dilation, - group=group) - weight_shape = [out_channels, in_channels // group, *self.kernel_size] - channel_axis = 0 - self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') - self.bias_add = P.BiasAdd() - self.bias = None - if Validator.check_bool(has_bias, "has_bias", self.cls_name): - self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') - - # initialize BatchNorm Parameter - self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma') - self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta') - self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False) - self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance', - requires_grad=False) - - # initialize fake ops - self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis, - num_channels=out_channels) - self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn) - self.correct_mul = Q.CorrectionMul(channel_axis) - if context.get_context('device_target') == "Ascend": - self.batchnorm_fold2_train = Q.BatchNormFold2D(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = Q.BatchNormFold2D(freeze_bn=0) - elif context.get_context('device_target') == "GPU": - self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn) - self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0) - self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False) - self.one = Tensor(1, mstype.int32) - self.assignadd = P.AssignAdd() - - @classmethod - def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig, extra_args: dict): - """ - A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct` - - Examples: - >>> from mindspore import nn - >>> ic = 10 - >>> oc = 100 - >>> kernel_size = 3 - >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size) - >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted - >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - ... activation=FakeQuantWithMinMaxObserver.partial_init()) - >>> extra_args = {"freeze_bn": 100000} - >>> conv_bn_quant = nn.Conv2dBnFoldQuant.from_float(conv_bn_op, quant_config, extra_args) - """ - - kwargs = {'in_channels': convbn.conv.in_channels, - 'out_channels': convbn.conv.out_channels, - 'kernel_size': convbn.conv.kernel_size, - 'stride': convbn.conv.stride, - 'pad_mode': convbn.conv.pad_mode, - 'padding': convbn.conv.padding, - 'dilation': convbn.conv.dilation, - 'group': convbn.conv.group, - 'has_bias': convbn.conv.has_bias, - 'bias_init': convbn.conv.bias_init, - 'weight_init': convbn.conv.weight_init, - 'quant_config': quant_config, - 'fake': True, - } - if hasattr(convbn, 'batchnorm'): - kwargs['eps'] = convbn.batchnorm.eps - kwargs['momentum'] = convbn.batchnorm.momentum - kwargs['beta_init'] = convbn.batchnorm.beta_init - kwargs['gamma_init'] = convbn.batchnorm.gamma_init - kwargs['mean_init'] = convbn.batchnorm.moving_mean_init - kwargs['var_init'] = convbn.batchnorm.moving_var_init - kwargs = {**kwargs, **extra_args} - return cls(**kwargs) - - def extend_repr(self): - """Display instance object as string.""" - s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \ - 'pad_mode={}, padding={}, dilation={}, group={}, ' \ - 'fake={}, freeze_bn={}, momentum={}'.format(self.in_channels, self.out_channels, self.kernel_size, - self.stride, self.pad_mode, self.padding, self.dilation, - self.group, self.fake, self.freeze_bn, self.momentum) - return s - - def construct(self, x): - out_conv = self.conv(x, self.weight) - if self.has_bias: - out_conv = self.bias_add(out_conv, self.bias) - # BN fold1 - batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv, - self.moving_mean, - self.moving_variance, - self.step) - # fake weight - weight = self.correct_mul(self.weight, self.gamma, running_std) - if self.fake: - weight = self.fake_quant_weight(weight) - out = self.conv(x, weight) - if self.has_bias: - out = self.bias_add(out, self.bias) - # BN fold2 - if self.is_gpu: - if self.training: - out = self.batchnorm_fold2_train(out, self.beta, self.gamma, - batch_std, batch_mean, running_std, running_mean, self.step) - self.assignadd(self.step, self.one) - else: - out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, - batch_std, batch_mean, running_std, running_mean, self.step) - else: - if self.training: - out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std) - self.assignadd(self.step, self.one) - else: - out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std) - return out - - -class Conv2dBnWithoutFoldQuant(Cell): - r""" - 2D convolution and batchnorm without fold with fake quantized construct. - - This part is a more detailed overview of Conv2d operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - .. math:: - y =x\times quant(w)+ b - - y_{bn} =\frac{y-E[y] }{\sqrt{Var[y]+ \epsilon } } *\gamma + \beta - - where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of - class of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window. - stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. - pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. - dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. - group (int): Splits filter into groups, `in_ channels` and `out_channels` must be - divisible by the number of groups. Default: 1. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. - eps (float): Parameters for Batch Normalization. Default: 1e-5. - momentum (float): Parameters for Batch Normalization op. Default: 0.997. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. - Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Raises: - TypeError: If `in_channels`, `out_channels` or `group` is not an int. - TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. - TypeError: If `has_bias` is not a bool. - ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. - ValueError: If `padding` is less than 0. - ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> conv2d_no_bnfold = nn.Conv2dBnWithoutFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid", - ... weight_init='ones', quant_config=qconfig) - >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32) - >>> result = conv2d_no_bnfold(x) - >>> print(result) - [[[[5.929658 13.835868] - [11.859316 17.78116]]]] - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - has_bias=False, - eps=1e-5, - momentum=0.997, - weight_init='normal', - bias_init='zeros', - quant_config=quant_config_default): - """Initialize Conv2dBnWithoutFoldQuant.""" - super(Conv2dBnWithoutFoldQuant, self).__init__() - self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) - self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) - self.has_bias = has_bias - self.kernel_size = twice(kernel_size) - self.stride = twice(stride) - self.dilation = twice(dilation) - for kernel_size_elem in self.kernel_size: - Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) - for stride_elem in self.stride: - Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) - for dilation_elem in self.dilation: - Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) - if pad_mode not in ('valid', 'same', 'pad'): - raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values in " - f"('valid', 'same', 'pad'), but got {pad_mode}.") - self.pad_mode = pad_mode - if isinstance(padding, int): - Validator.check_non_negative_int(padding, 'padding', self.cls_name) - self.padding = padding - elif isinstance(padding, tuple): - for pad in padding: - Validator.check_non_negative_int(pad, 'padding item', self.cls_name) - self.padding = padding - else: - raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), " - f"but got {type(padding).__name__}!") - self.group = Validator.check_positive_int(group, "group", self.cls_name) - self.bias_add = P.BiasAdd() - if Validator.check_bool(has_bias, "has_bias", self.cls_name): - self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') - else: - self.bias = None - # initialize convolution op and Parameter - self.conv = P.Conv2D(out_channel=self.out_channels, - kernel_size=self.kernel_size, - mode=1, - pad_mode=self.pad_mode, - pad=self.padding, - stride=self.stride, - dilation=self.dilation, - group=self.group) - weight_shape = [out_channels, in_channels // group, *self.kernel_size] - channel_axis = 0 - self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') - self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis, - num_channels=out_channels) - self.batchnorm = BatchNorm2d(out_channels, eps=eps, momentum=momentum) - - @classmethod - def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig): - """ - A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct` - - Examples: - >>> from mindspore import nn - >>> ic = 10 - >>> oc = 100 - >>> kernel_size = 3 - >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size) - >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted - >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - ... activation=FakeQuantWithMinMaxObserver.partial_init()) - >>> conv_bn_quant = nn.Conv2dBnFoldQuant.from_float(conv_bn_op, quant_config) - """ - - kwargs = {'in_channels': convbn.conv.in_channels, - 'out_channels': convbn.conv.out_channels, - 'kernel_size': convbn.conv.kernel_size, - 'stride': convbn.conv.stride, - 'pad_mode': convbn.conv.pad_mode, - 'padding': convbn.conv.padding, - 'dilation': convbn.conv.dilation, - 'group': convbn.conv.group, - 'has_bias': convbn.conv.has_bias, - 'bias_init': convbn.conv.bias_init, - 'weight_init': convbn.conv.weight_init, - 'quant_config': quant_config, - } - if hasattr(convbn, 'batchnorm'): - kwargs['eps'] = convbn.batchnorm.eps - kwargs['momentum'] = convbn.batchnorm.momentum - kwargs = {**kwargs} - return cls(**kwargs) - - def construct(self, x): - weight = self.fake_quant_weight(self.weight) - out = self.conv(x, weight) - if self.has_bias: - out = self.bias_add(out, self.bias) - out = self.batchnorm(out) - return out - - def extend_repr(self): - """Display instance object as string.""" - s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \ - 'pad_mode={}, padding={}, dilation={}, group={}, ' \ - 'has_bias={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, - self.padding, self.dilation, self.group, self.has_bias) - return s - - -class Conv2dQuant(Cell): - r""" - 2D convolution with fake quantized operation layer. - - This part is a more detailed overview of Conv2d operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - in_channels (int): The number of input channel :math:`C_{in}`. - out_channels (int): The number of output channel :math:`C_{out}`. - kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window. - stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1. - pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same". - padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0. - dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1. - group (int): Splits filter into groups, `in_ channels` and `out_channels` must be - divisible by the number of groups. Default: 1. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. - Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - The input dimension is preferably 2D or 4D. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Raises: - TypeError: If `in_channels`, `out_channels` or `group` is not an int. - TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple. - TypeError: If `has_bias` is not a bool. - ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. - ValueError: If `padding` is less than 0. - ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> conv2d_quant = nn.Conv2dQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid", - ... weight_init='ones', quant_config=qconfig) - >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32) - >>> result = conv2d_quant(x) - >>> print(result) - [[[[5.9296875 13.8359375] - [11.859375 17.78125]]]] - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - has_bias=False, - weight_init='normal', - bias_init='zeros', - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize Conv2dQuant.""" - super(Conv2dQuant, self).__init__() - self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) - self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) - self.has_bias = has_bias - self.kernel_size = twice(kernel_size) - self.stride = twice(stride) - self.dilation = twice(dilation) - for kernel_size_elem in self.kernel_size: - Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) - for stride_elem in self.stride: - Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) - for dilation_elem in self.dilation: - Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) - if pad_mode not in ('valid', 'same', 'pad'): - raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values " - f"in ('valid', 'same', 'pad'), but got {pad_mode}.") - self.pad_mode = pad_mode - if isinstance(padding, int): - Validator.check_non_negative_int(padding, 'padding', self.cls_name) - self.padding = padding - elif isinstance(padding, tuple): - for pad in padding: - Validator.check_non_negative_int(pad, 'padding item', self.cls_name) - self.padding = padding - else: - raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), " - f"but got {type(padding).__name__}!") - self.group = Validator.check_positive_int(group, "group", self.cls_name) - - weight_shape = [out_channels, in_channels // group, *self.kernel_size] - self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') - - self.bias_add = P.BiasAdd() - if Validator.check_bool(has_bias, "has_bias", self.cls_name): - self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') - else: - self.bias = None - - self.conv = P.Conv2D(out_channel=self.out_channels, - kernel_size=self.kernel_size, - mode=1, - pad_mode=self.pad_mode, - pad=self.padding, - stride=self.stride, - dilation=self.dilation, - group=self.group) - channel_axis = 0 - self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis, - num_channels=out_channels) - - @classmethod - def from_float(cls, conv: Conv2d, quant_config: QuantConfig): - """ - A class method to create `Conv2dQuant` from a `Conv2d` - - Examples: - >>> from mindspore import nn - >>> ic = 10 - >>> oc = 100 - >>> kernel_size = 3 - >>> conv_op = nn.Conv2d(ic, oc, kernel_size) - >>> # when apply QAT on `conv_op`, QAT need to create a quant conv2d whose weight is fake-quanted - >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - ... activation=FakeQuantWithMinMaxObserver.partial_init()) - >>> conv_quant = nn.Conv2dQuant.from_float(conv_op, quant_config) - """ - conv_quant = cls( - conv.in_channels, - conv.out_channels, - kernel_size=conv.kernel_size, - stride=conv.stride, - pad_mode=conv.pad_mode, - padding=conv.padding, - dilation=conv.dilation, - group=conv.group, - has_bias=conv.has_bias, - bias_init=conv.bias_init, - weight_init=conv.weight_init, - quant_config=quant_config) - return conv_quant - - def construct(self, x): - weight = self.fake_quant_weight(self.weight) - out = self.conv(x, weight) - if self.has_bias: - return self.bias_add(out, self.bias) - return out - - def extend_repr(self): - """Display instance object as string.""" - s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \ - 'pad_mode={}, padding={}, dilation={}, group={}, ' \ - 'has_bias={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode, - self.padding, self.dilation, self.group, self.has_bias) - return s - - -class DenseQuant(Cell): - r""" - The fully connected layer with fake quantized operation. - - This part is a more detailed overview of Dense operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - in_channels (int): The dimension of the input space. - out_channels (int): The dimension of the output space. - weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype - is same as `x`. The values of str refer to the function `initializer`. Default: 'normal'. - bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is - same as `x`. The values of str refer to the function `initializer`. Default: 'zeros'. - has_bias (bool): Specifies whether the layer uses a bias vector. Default: True. - activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer, - eg. 'relu'. Default: None. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. - The input dimension is preferably 2D or 4D. - - Outputs: - Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`. - - Raises: - TypeError: If `in_channels`, `out_channels` is not an int. - TypeError: If `has_bias` is not a bool. - TypeError: If `activation` is not str, Cell and Primitive. - ValueError: If `in_channels` or `out_channels` is less than 1. - ValueError: If the dims of `weight_init` is not equal to 2 or the first element of `weight_init` is not equal - to `out_channels` or the second element of `weight_init` is not equal to `in_channels`. - ValueError: If the dims of `bias_init` is not equal to 1 or the element of `bias_init` is not equal - to `out_channels`. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> dense_quant = nn.DenseQuant(2, 1, weight_init='ones', quant_config=qconfig) - >>> x = Tensor(np.array([[1, 5], [3, 4]]), mindspore.float32) - >>> result = dense_quant(x) - >>> print(result) - [[5.929413] - [6.9176483]] - """ - - def __init__(self, - in_channels, - out_channels, - weight_init='normal', - bias_init='zeros', - has_bias=True, - activation=None, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize DenseQuant.""" - super(DenseQuant, self).__init__() - self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name) - self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name) - self.has_bias = Validator.check_bool(has_bias, "has_bias", self.cls_name) - - if isinstance(weight_init, Tensor): - if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ - weight_init.shape[1] != in_channels: - raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' should " - f"be equal to 2, and the first dim must be equal to 'out_channels', and the " - f"second dim must be equal to 'in_channels'. But got 'weight_init': {weight_init}, " - f"'out_channels': {out_channels}, 'in_channels': {in_channels}.") - - self.weight = Parameter(initializer( - weight_init, [out_channels, in_channels]), name="weight") - - if self.has_bias: - if isinstance(bias_init, Tensor): - if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: - raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' should " - f"be equal to 1, and the first dim must be equal to 'out_channels'. But got " - f"'bias_init': {bias_init}, 'out_channels': {out_channels}.") - - self.bias = Parameter(initializer( - bias_init, [out_channels]), name="bias") - - self.matmul = P.MatMul(transpose_b=True) - self.bias_add = P.BiasAdd() - - self.activation = get_activation(activation) if isinstance(activation, str) else activation - if activation is not None and not isinstance(self.activation, (Cell, Primitive)): - raise TypeError(f"For '{self.cls_name}', the 'activation' must be str or Cell or Primitive, " - f"but got {activation}.") - - self.activation_flag = self.activation is not None - self.fake_quant_weight = quant_config.weight(channel_axis=0, - num_channels=out_channels) - - @classmethod - def from_float(cls, dense: Dense, quant_config: QuantConfig): - """ - A class method to create `DenseQuant` from a `Dense` - - Examples: - >>> from mindspore import nn - >>> ic = 10 - >>> oc = 100 - >>> dense_op = nn.Dense(ic, oc) - >>> # when apply QAT on `dense_op`, QAT need to create a quant dense whose weight is fake-quanted - >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(), - ... activation=FakeQuantWithMinMaxObserver.partial_init()) - >>> dense_quant = nn.DenseQuant.from_float(dense_op, quant_config) - """ - dense_quant = cls( - dense.in_channels, - dense.out_channels, - dense.weight, - dense.bias, - dense.has_bias, - dense.activation, - quant_config=quant_config) - return dense_quant - - def construct(self, x): - """Use operators to construct the Dense layer. - - Args: - x (Tensor): Input tensor. - """ - output = self.fake_quant_weight(self.weight) - output = self.matmul(x, output) - if self.has_bias: - output = self.bias_add(output, self.bias) - if self.activation_flag: - return self.activation(output) - return output - - def extend_repr(self): - """A pretty print for Dense layer.""" - s = 'in_channels={}, out_channels={}, weight={}, has_bias={}'.format( - self.in_channels, self.out_channels, self.weight, self.has_bias) - if self.has_bias: - s += ', bias={}'.format(self.bias) - if self.activation_flag: - s += ', activation={}'.format(self.activation) - return s - - -class _QuantActivation(Cell): - r""" - Base class for quantization aware training activation function. Adds fake quantized operation - after activation operation. - """ - - def get_origin(self): - raise NotImplementedError - - -class ActQuant(_QuantActivation): - r""" - Quantization aware training activation function. - - Add the fake quantized operation to the end of activation operation, by which the output of activation - operation will be truncated. For more details about Quantization, please refer to the implementation - of subclass of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - activation (Cell): Activation cell. - ema (bool): The exponential Moving Average algorithm updates min and max. Default: False. - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. - fake_before (bool): Whether add fake quantized operation before activation. Default: False. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x** (Tensor) - The input of ActQuant. The input dimension is preferably 2D or 4D. - - Outputs: - Tensor, with the same type and shape as the `x`. - - Raises: - TypeError: If `activation` is not an instance of Cell. - TypeError: If `fake_before` is not a bool. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> act_quant = nn.ActQuant(nn.ReLU(), quant_config=qconfig) - >>> x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32) - >>> result = act_quant(x) - >>> print(result) - [[0.9882355 1.9764705 0. ] - [0. 0. 0. ]] - """ - - def __init__(self, - activation, - ema=False, - ema_decay=0.999, - fake_before=False, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize ActQuant.""" - super(ActQuant, self).__init__() - act_class = activation.__class__ - act_list = [nn.ReLU, nn.ReLU6] - self.act = Validator.check_isinstance("activation", activation, Cell) - self.fake_before = Validator.check_bool(fake_before, "fake_before", self.cls_name) - if self.fake_before: - self.fake_quant_act_before = quant_config.activation(min_init=-6, - max_init=6, - ema=ema, - ema_decay=ema_decay, - quant_dtype=quant_dtype) - self.neg_trunc = False - self.narrow_range = False - preset_dict = quant_config.activation.p.keywords - if 'mode' in preset_dict and preset_dict['mode'] == "LEARNED_SCALE" and act_class in act_list: - self.neg_trunc = True - elif 'narrow_range' in preset_dict: - self.narrow_range = preset_dict['narrow_range'] - - self.fake_quant_act = quant_config.activation(min_init=-6, - max_init=6, - ema=ema, - ema_decay=ema_decay, - quant_dtype=quant_dtype, - neg_trunc=self.neg_trunc, - narrow_range=self.narrow_range) - - def construct(self, x): - if self.fake_before: - x = self.fake_quant_act_before(x) - x = self.act(x) - x = self.fake_quant_act(x) - return x - - def get_origin(self): - return self.act - - -class TensorAddQuant(Cell): - r""" - Adds fake quantized operation after TensorAdd operation. - - This part is a more detailed overview of TensorAdd operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x1** (Tensor) - The first tensor of TensorAddQuant. The input dimension is preferably 2D or 4D. - - **x2** (Tensor) - The second tensor of TensorAddQuant. Has the same shape with `x1`. - - Outputs: - Tensor, with the same type and shape as the `x1`. - - Raises: - TypeError: If `ema_decay` is not a float. - ValueError: If the shape of `x2` is different with `x1`. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> add_quant = nn.TensorAddQuant(quant_config=qconfig) - >>> x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32) - >>> x2 = Tensor(np.ones((2, 3)), mindspore.float32) - >>> output = add_quant(x1, x2) - >>> print(output) - [[ 1.9764705 3.011765 1.9764705] - [-0.9882355 0.9882355 0. ]] - """ - - def __init__(self, - ema_decay=0.999, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize TensorAddQuant.""" - super(TensorAddQuant, self).__init__() - self.fake_quant_act = quant_config.activation(min_init=-6, - max_init=6, - ema=True, - ema_decay=ema_decay, - quant_dtype=quant_dtype) - self.add = P.Add() - - def construct(self, x1, x2): - x = self.add(x1, x2) - x = self.fake_quant_act(x) - return x - - -class MulQuant(Cell): - r""" - Adds fake quantized operation after `Mul` operation. - - This part is a more detailed overview of `Mul` operation. For more details about Quantization, - please refer to the implementation of class of `FakeQuantWithMinMaxObserver`, - :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. - - Args: - ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. - quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and - activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization - and can be generated by :func:`mindspore.compression.quant.create_quant_config` method. - Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`. - quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8. - - Inputs: - - **x1** (Tensor) - The first tensor of MulQuant. The input dimension is preferably 2D or 4D. - - **x2** (Tensor) - The second tensor of MulQuant. Has the same shape with `x1`. - - Outputs: - Tensor, with the same type and shape as the `x1`. - - Raises: - TypeError: If `ema_decay` is not a float. - ValueError: If the shape of `x2` is different with `x1`. - - Supported Platforms: - ``Ascend`` ``GPU`` - - Examples: - >>> import mindspore - >>> from mindspore.compression import quant - >>> from mindspore import Tensor - >>> qconfig = quant.create_quant_config() - >>> mul_quant = nn.MulQuant(quant_config=qconfig) - >>> x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32) - >>> x2 = Tensor(np.ones((2, 3)) * 2, mindspore.float32) - >>> output = mul_quant(x1, x2) - >>> print(output) - [[ 1.9764705 4.0000005 1.9764705] - [-4. 0. -1.9764705]] - """ - - def __init__(self, - ema_decay=0.999, - quant_config=quant_config_default, - quant_dtype=QuantDtype.INT8): - """Initialize MulQuant.""" - super(MulQuant, self).__init__() - self.fake_quant_act = quant_config.activation(min_init=-6, - max_init=6, - ema=True, - ema_decay=ema_decay, - quant_dtype=quant_dtype) - self.mul = P.Mul() - - def construct(self, x1, x2): - x = self.mul(x1, x2) - x = self.fake_quant_act(x) - return x diff --git a/mindspore/python/mindspore/train/serialization.py b/mindspore/python/mindspore/train/serialization.py index 48581c1d382..59c2d286359 100644 --- a/mindspore/python/mindspore/train/serialization.py +++ b/mindspore/python/mindspore/train/serialization.py @@ -25,7 +25,6 @@ import stat import threading from threading import Thread, Lock from collections import defaultdict, OrderedDict -from functools import wraps from io import BytesIO import math @@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor from mindspore.common._utils import is_shape_unknown from mindspore.communication.management import get_rank, get_group_size -from mindspore.compression.export import quant_export from mindspore.experimental import MapParameter from mindspore.parallel._cell_wrapper import get_allgather_cell from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index @@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs): kwargs (dict): Configuration options dictionary. - - quant_mode (str): If the network is a quantization aware training network, the quant_mode should - be set to "QUANT", else the quant_mode should be set to "NONQUANT". - - mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network. - Default: 127.5. - - std_dev (float): The variance of input data after preprocessing, - used for quantizing the first layer of the network. Default: 127.5. - enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32. - enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set. @@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs): inputs = tuple(inputs_col) file_name = os.path.realpath(file_name) - net = _quant_export(net, *inputs, file_format=file_format, **kwargs) if 'enc_key' in kwargs.keys(): kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs) _export(net, file_name, file_format, *inputs, **kwargs) @@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset): model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False -def quant_mode_manage(func): - """Inherit the quant_mode in old version.""" - - @wraps(func) - def wrapper(network, *inputs, file_format, **kwargs): - if 'quant_mode' not in kwargs: - return network - quant_mode = kwargs.get('quant_mode') - if not isinstance(quant_mode, str): - raise TypeError("For 'export', the type of 'quant_mode' should be string, " - "but got {}.".format(type(quant_mode))) - if quant_mode in ('AUTO', 'MANUAL'): - kwargs['quant_mode'] = 'QUANT' - return func(network, *inputs, file_format=file_format, **kwargs) - - return wrapper - - -@quant_mode_manage -def _quant_export(network, *inputs, file_format, **kwargs): - """Exports MindSpore quantization predict model to deploy with AIR and MINDIR.""" - supported_device = ["Ascend", "GPU"] - supported_formats = ['AIR', 'MINDIR'] - quant_mode_formats = ['QUANT', 'NONQUANT'] - - quant_mode = kwargs['quant_mode'] - if quant_mode not in quant_mode_formats: - raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, " - f"but got {quant_mode}.") - if quant_mode == 'NONQUANT': - return network - quant_net = copy.deepcopy(network) - quant_net._create_time = int(time.time() * 1e9) - - mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean') - std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev') - mean = Validator.check_value_type("mean", mean, (int, float)) - std_dev = Validator.check_value_type("std_dev", std_dev, (int, float)) - - if context.get_context('device_target') not in supported_device: - raise KeyError(f"For 'export', quant export only support {supported_device} device target now, " - f"but got {context.get_context('device_target')}") - - if file_format not in supported_formats: - raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, " - f"but got {file_format}.") - - quant_net.set_train(False) - if file_format == "MINDIR": - exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True) - else: - exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs) - deploy_net = exporter.run() - return deploy_net - - def parse_print(print_file_name): """ Parse data file generated by mindspore.ops.Print. diff --git a/tests/st/quantization/lenet_quant/config.py b/tests/st/quantization/lenet_quant/config.py deleted file mode 100644 index 1106edfa6da..00000000000 --- a/tests/st/quantization/lenet_quant/config.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -network config setting, will be used in test_lenet_quant.py -""" - -from easydict import EasyDict as edict - -quant_cfg = edict({ - 'num_classes': 10, - 'lr': 0.01, - 'momentum': 0.9, - 'epoch_size': 10, - 'batch_size': 64, - 'buffer_size': 1000, - 'image_height': 32, - 'image_width': 32, - 'keep_checkpoint_max': 10, -}) diff --git a/tests/st/quantization/lenet_quant/dataset.py b/tests/st/quantization/lenet_quant/dataset.py deleted file mode 100644 index 164825686b2..00000000000 --- a/tests/st/quantization/lenet_quant/dataset.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -Produce the dataset -""" - -import mindspore.dataset as ds -import mindspore.dataset.vision as CV -import mindspore.dataset.transforms as C -from mindspore.dataset.vision import Inter -from mindspore.common import dtype as mstype - - -def create_dataset(data_path, batch_size=32, repeat_size=1, - num_parallel_workers=1): - """ - create dataset for train or test - """ - # define dataset - mnist_ds = ds.MnistDataset(data_path) - - resize_height, resize_width = 32, 32 - rescale = 1.0 / 255.0 - shift = 0.0 - rescale_nml = 1 / 0.3081 - shift_nml = -1 * 0.1307 / 0.3081 - - # define map operations - resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode - rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) - rescale_op = CV.Rescale(rescale, shift) - hwc2chw_op = CV.HWC2CHW() - type_cast_op = C.TypeCast(mstype.int32) - - # apply map operations on images - mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) - mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) - - # apply DatasetOps - buffer_size = 10000 - mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script - mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) - mnist_ds = mnist_ds.repeat(repeat_size) - - return mnist_ds diff --git a/tests/st/quantization/lenet_quant/lenet_fusion.py b/tests/st/quantization/lenet_quant/lenet_fusion.py deleted file mode 100644 index 88b35935027..00000000000 --- a/tests/st/quantization/lenet_quant/lenet_fusion.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""LeNet.""" -import mindspore.nn as nn - - -class LeNet5(nn.Cell): - """ - Lenet network - - Args: - num_class (int): Num classes. Default: 10. - - Returns: - Tensor, output tensor - Examples: - >>> LeNet(num_class=10) - - """ - - def __init__(self, num_class=10, channel=1): - super(LeNet5, self).__init__() - self.type = "fusion" - self.num_class = num_class - - # change `nn.Conv2d` to `nn.Conv2dBnAct` - self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu') - self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu') - # change `nn.Dense` to `nn.DenseBnAct` - self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') - self.fc2 = nn.DenseBnAct(120, 84, activation='relu') - self.fc3 = nn.DenseBnAct(84, self.num_class) - - self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - self.flatten = nn.Flatten() - - def construct(self, x): - x = self.conv1(x) - x = self.max_pool2d(x) - x = self.conv2(x) - x = self.max_pool2d(x) - x = self.flatten(x) - x = self.fc1(x) - x = self.fc2(x) - x = self.fc3(x) - return x diff --git a/tests/st/quantization/lenet_quant/test_lenet_quant.py b/tests/st/quantization/lenet_quant/test_lenet_quant.py deleted file mode 100644 index f52d04c5375..00000000000 --- a/tests/st/quantization/lenet_quant/test_lenet_quant.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" -train and infer lenet quantization network -""" - -import os -import pytest -from mindspore import context -from mindspore import Tensor -from mindspore.common import dtype as mstype -import mindspore.nn as nn -from mindspore.train.metrics import Accuracy -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor -from mindspore import load_checkpoint, load_param_into_net, export -from mindspore.train import Model -from mindspore.compression.quant import QuantizationAwareTraining -from mindspore.compression.quant.quantizer import OptimizeOption -from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net -from dataset import create_dataset -from config import quant_cfg -from lenet_fusion import LeNet5 as LeNet5Fusion -import numpy as np - -data_path = "/home/workspace/mindspore_dataset/mnist" -lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt" - -def train_lenet_quant(optim_option="QAT"): - cfg = quant_cfg - ckpt_path = lenet_ckpt_path - ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1) - step_size = ds_train.get_dataset_size() - - # define fusion network - network = LeNet5Fusion(cfg.num_classes) - - # load quantization aware network checkpoint - param_dict = load_checkpoint(ckpt_path) - load_nonquant_param_into_quant_net(network, param_dict) - - # convert fusion network to quantization aware network - if optim_option == "LEARNED_SCALE": - quant_optim_otions = OptimizeOption.LEARNED_SCALE - quantizer = QuantizationAwareTraining(bn_fold=False, - per_channel=[True, False], - symmetric=[True, True], - narrow_range=[True, True], - freeze_bn=0, - quant_delay=0, - one_conv_fold=True, - optimize_option=quant_optim_otions) - else: - quantizer = QuantizationAwareTraining(quant_delay=900, - bn_fold=False, - per_channel=[True, False], - symmetric=[True, False]) - network = quantizer.quantize(network) - - # define network loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") - # define network optimization - net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) - - # call back and monitor - config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, - keep_checkpoint_max=cfg.keep_checkpoint_max) - ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt) - - # define model - model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) - - print("============== Starting Training ==============") - model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()], - dataset_sink_mode=True) - print("============== End Training ==============") - - -def eval_quant(optim_option="QAT"): - cfg = quant_cfg - ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1) - ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt' - # define fusion network - network = LeNet5Fusion(cfg.num_classes) - # convert fusion network to quantization aware network - if optim_option == "LEARNED_SCALE": - quant_optim_otions = OptimizeOption.LEARNED_SCALE - quantizer = QuantizationAwareTraining(bn_fold=False, - per_channel=[True, False], - symmetric=[True, True], - narrow_range=[True, True], - freeze_bn=0, - quant_delay=0, - one_conv_fold=True, - optimize_option=quant_optim_otions) - else: - quantizer = QuantizationAwareTraining(quant_delay=0, - bn_fold=False, - freeze_bn=10000, - per_channel=[True, False], - symmetric=[True, False]) - network = quantizer.quantize(network) - - # define loss - net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") - # define network optimization - net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) - - # call back and monitor - model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) - - # load quantization aware network checkpoint - param_dict = load_checkpoint(ckpt_path) - not_load_param = load_param_into_net(network, param_dict) - if not_load_param: - raise ValueError("Load param into net fail!") - - print("============== Starting Testing ==============") - acc = model.eval(ds_eval, dataset_sink_mode=True) - print("============== {} ==============".format(acc)) - assert acc['Accuracy'] > 0.98 - - -def export_lenet(optim_option="QAT", file_format="MINDIR"): - cfg = quant_cfg - # define fusion network - network = LeNet5Fusion(cfg.num_classes) - # convert fusion network to quantization aware network - if optim_option == "LEARNED_SCALE": - quant_optim_otions = OptimizeOption.LEARNED_SCALE - quantizer = QuantizationAwareTraining(bn_fold=False, - per_channel=[True, False], - symmetric=[True, True], - narrow_range=[True, True], - freeze_bn=0, - quant_delay=0, - one_conv_fold=True, - optimize_option=quant_optim_otions) - else: - quantizer = QuantizationAwareTraining(quant_delay=0, - bn_fold=False, - freeze_bn=10000, - per_channel=[True, False], - symmetric=[True, False]) - network = quantizer.quantize(network) - - # export network - inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32) - export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO') - - -@pytest.mark.level1 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_lenet_quant(): - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - train_lenet_quant() - eval_quant() - export_lenet() - train_lenet_quant(optim_option="LEARNED_SCALE") - eval_quant(optim_option="LEARNED_SCALE") - export_lenet(optim_option="LEARNED_SCALE") - - -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -def test_lenet_quant_ascend(): - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - train_lenet_quant(optim_option="LEARNED_SCALE") - eval_quant(optim_option="LEARNED_SCALE") - export_lenet(optim_option="LEARNED_SCALE", file_format="AIR") - - -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -def test_lenet_quant_ascend_pynative(): - """ - test_lenet_quant_ascend_pynative - Features: test_lenet_quant_ascend_pynative - Description: test_lenet_quant_ascend_pynative pynative mode - Expectation: None - """ - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") - train_lenet_quant(optim_option="QAT") diff --git a/tests/st/quantization/mobilenetv2_quant/dataset.py b/tests/st/quantization/mobilenetv2_quant/dataset.py deleted file mode 100644 index ee2802fa86a..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/dataset.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" create train dataset. """ - -from functools import partial -import mindspore.dataset as ds -import mindspore.common.dtype as mstype -import mindspore.dataset.vision as C -import mindspore.dataset.transforms as C2 - - -def create_dataset(dataset_path, config, repeat_num=1, batch_size=32): - """ - create a train dataset - - Args: - dataset_path(string): the path of dataset. - config(EasyDict):the basic config for training - repeat_num(int): the repeat times of dataset. Default: 1. - batch_size(int): the batch size of dataset. Default: 32. - - Returns: - dataset - """ - - load_func = partial(ds.Cifar10Dataset, dataset_path) - cifar_ds = load_func(num_parallel_workers=8, shuffle=False) - - resize_height = config.image_height - resize_width = config.image_width - rescale = 1.0 / 255.0 - shift = 0.0 - - # define map operations - # interpolation default BILINEAR - resize_op = C.Resize((resize_height, resize_width)) - rescale_op = C.Rescale(rescale, shift) - normalize_op = C.Normalize( - (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) - changeswap_op = C.HWC2CHW() - type_cast_op = C2.TypeCast(mstype.int32) - - c_trans = [resize_op, rescale_op, normalize_op, changeswap_op] - - # apply map operations on images - cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op) - cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans) - - # apply batch operations - cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) - - # apply dataset repeat operation - cifar_ds = cifar_ds.repeat(repeat_num) - - return cifar_ds diff --git a/tests/st/quantization/mobilenetv2_quant/lr_generator.py b/tests/st/quantization/mobilenetv2_quant/lr_generator.py deleted file mode 100644 index bc6ff8106e2..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/lr_generator.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""learning rate generator""" - -import math -import numpy as np - - -def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): - """ - generate learning rate array - - Args: - global_step(int): total steps of the training - lr_init(float): init learning rate - lr_end(float): end learning rate - lr_max(float): max learning rate - warmup_epochs(int): number of warmup epochs - total_epochs(int): total epoch of training - steps_per_epoch(int): steps of one epoch - - Returns: - np.array, learning rate array - """ - lr_each_step = [] - total_steps = steps_per_epoch * total_epochs - warmup_steps = steps_per_epoch * warmup_epochs - for i in range(total_steps): - if i < warmup_steps: - lr = lr_init + (lr_max - lr_init) * i / warmup_steps - else: - lr = lr_end + \ - (lr_max - lr_end) * \ - (1. + math.cos(math.pi * (i - warmup_steps) / - (total_steps - warmup_steps))) / 2. - if lr < 0.0: - lr = 0.0 - lr_each_step.append(lr) - - current_step = global_step - lr_each_step = np.array(lr_each_step).astype(np.float32) - learning_rate = lr_each_step[current_step:] - - return learning_rate diff --git a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py b/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py deleted file mode 100644 index bd176021b29..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""MobileNetV2 Quant model define""" - -import numpy as np - -import mindspore.nn as nn -from mindspore.ops import operations as P -from mindspore import Tensor - -__all__ = ['mobilenetV2'] - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class GlobalAvgPooling(nn.Cell): - """ - Global avg pooling definition. - - Args: - - Returns: - Tensor, output tensor. - - Examples: - >>> GlobalAvgPooling() - """ - - def __init__(self): - super(GlobalAvgPooling, self).__init__() - self.mean = P.ReduceMean(keep_dims=False) - - def construct(self, x): - x = self.mean(x, (2, 3)) - return x - - -class ConvBNReLU(nn.Cell): - """ - Convolution/Depthwise fused with Batchnorm and ReLU block definition. - - Args: - in_planes (int): Input channel. - out_planes (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size for the first convolutional layer. Default: 1. - groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. - - Returns: - Tensor, output tensor. - - Examples: - >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) - """ - - def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): - super(ConvBNReLU, self).__init__() - padding = (kernel_size - 1) // 2 - self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size, - stride=stride, - pad_mode='pad', - padding=padding, - group=groups, - has_bn=True, - activation='relu') - - def construct(self, x): - x = self.conv(x) - return x - - -class InvertedResidual(nn.Cell): - """ - Mobilenetv2 residual block definition. - - Args: - inp (int): Input channel. - oup (int): Output channel. - stride (int): Stride size for the first convolutional layer. Default: 1. - expand_ratio (int): expand ration of input channel - - Returns: - Tensor, output tensor. - - Examples: - >>> ResidualBlock(3, 256, 1, 1) - """ - - def __init__(self, inp, oup, stride, expand_ratio): - super(InvertedResidual, self).__init__() - assert stride in [1, 2] - - hidden_dim = int(round(inp * expand_ratio)) - self.use_res_connect = stride == 1 and inp == oup - - layers = [] - if expand_ratio != 1: - layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) - layers.extend([ - # dw - ConvBNReLU(hidden_dim, hidden_dim, - stride=stride, groups=hidden_dim), - # pw-linear - nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1, - pad_mode='pad', padding=0, group=1, has_bn=True) - ]) - self.conv = nn.SequentialCell(layers) - self.add = P.Add() - - def construct(self, x): - out = self.conv(x) - if self.use_res_connect: - out = self.add(out, x) - return out - - -class mobilenetV2(nn.Cell): - """ - mobilenetV2 fusion architecture. - - Args: - class_num (Cell): number of classes. - width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. - has_dropout (bool): Is dropout used. Default is false - inverted_residual_setting (list): Inverted residual settings. Default is None - round_nearest (list): Channel round to . Default is 8 - Returns: - Tensor, output tensor. - - Examples: - >>> mobilenetV2(num_classes=1000) - """ - - def __init__(self, num_classes=1000, width_mult=1., - has_dropout=False, inverted_residual_setting=None, round_nearest=8): - super(mobilenetV2, self).__init__() - block = InvertedResidual - input_channel = 32 - last_channel = 1280 - # setting of inverted residual blocks - self.cfgs = inverted_residual_setting - if inverted_residual_setting is None: - self.cfgs = [ - # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], - [6, 32, 3, 2], - [6, 64, 4, 2], - [6, 96, 3, 1], - [6, 160, 3, 2], - [6, 320, 1, 1], - ] - - # building first layer - input_channel = _make_divisible( - input_channel * width_mult, round_nearest) - self.out_channels = _make_divisible( - last_channel * max(1.0, width_mult), round_nearest) - - features = [ConvBNReLU(3, input_channel, stride=2)] - # building inverted residual blocks - for t, c, n, s in self.cfgs: - output_channel = _make_divisible(c * width_mult, round_nearest) - for i in range(n): - stride = s if i == 0 else 1 - features.append( - block(input_channel, output_channel, stride, expand_ratio=t)) - input_channel = output_channel - # building last several layers - features.append(ConvBNReLU( - input_channel, self.out_channels, kernel_size=1)) - # make it nn.CellList - self.features = nn.SequentialCell(features) - # mobilenet head - head = ([GlobalAvgPooling(), - nn.DenseBnAct(self.out_channels, num_classes, - has_bias=True, has_bn=False) - ] if not has_dropout else - [GlobalAvgPooling(), - nn.Dropout(0.2), - nn.DenseBnAct(self.out_channels, num_classes, - has_bias=True, has_bn=False) - ]) - self.head = nn.SequentialCell(head) - - # init weights - self.init_parameters_data() - self._initialize_weights() - - def construct(self, x): - x = self.features(x) - x = self.head(x) - return x - - def _initialize_weights(self): - """ - Initialize weights. - - Args: - - Returns: - None. - - Examples: - >>> _initialize_weights() - """ - self.init_parameters_data() - for _, m in self.cells_and_names(): - np.random.seed(1) - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - w = Tensor(np.random.normal(0, np.sqrt(2. / n), - m.weight.data.shape).astype("float32")) - m.weight.set_data(w) - if m.bias is not None: - m.bias.set_data( - Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) - elif isinstance(m, nn.Conv2dBnAct): - n = m.conv.kernel_size[0] * \ - m.conv.kernel_size[1] * m.conv.out_channels - w = Tensor(np.random.normal(0, np.sqrt(2. / n), - m.conv.weight.data.shape).astype("float32")) - m.conv.weight.set_data(w) - if m.conv.bias is not None: - m.conv.bias.set_data( - Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32"))) - elif isinstance(m, nn.BatchNorm2d): - m.gamma.set_data( - Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) - m.beta.set_data( - Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) - elif isinstance(m, nn.Dense): - m.weight.set_data(Tensor(np.random.normal( - 0, 0.01, m.weight.data.shape).astype("float32"))) - if m.bias is not None: - m.bias.set_data( - Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) - elif isinstance(m, nn.DenseBnAct): - m.dense.weight.set_data( - Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32"))) - if m.dense.bias is not None: - m.dense.bias.set_data( - Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32"))) diff --git a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py deleted file mode 100644 index 90b4162d54c..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Train Mobilenetv2_quant on Cifar10""" - - -import pytest -import numpy as np -from easydict import EasyDict as ed - -from mindspore import context -from mindspore import Tensor -from mindspore import nn -from mindspore.train.model import Model -from mindspore.compression.quant import QuantizationAwareTraining -from mindspore.common import set_seed - -from dataset import create_dataset -from lr_generator import get_lr -from utils import Monitor, CrossEntropyWithLabelSmooth -from mobilenetV2 import mobilenetV2 - -config_ascend_quant = ed({ - "num_classes": 10, - "image_height": 224, - "image_width": 224, - "batch_size": 200, - "step_threshold": 10, - "data_load_mode": "mindata", - "epoch_size": 1, - "start_epoch": 200, - "warmup_epochs": 1, - "lr": 0.3, - "momentum": 0.9, - "weight_decay": 4e-5, - "label_smooth": 0.1, - "loss_scale": 1024, - "save_checkpoint": True, - "save_checkpoint_epochs": 1, - "keep_checkpoint_max": 300, - "save_checkpoint_path": "./checkpoint", -}) - -dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/" - - -def train(): - """train""" - config = config_ascend_quant - print("training configure: {}".format(config)) - - epoch_size = config.epoch_size - - # define network - network = mobilenetV2(num_classes=config.num_classes) - # define loss - if config.label_smooth > 0: - loss = CrossEntropyWithLabelSmooth( - smooth_factor=config.label_smooth, num_classes=config.num_classes) - else: - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - # define dataset - dataset = create_dataset(dataset_path=dataset_path, - config=config, - repeat_num=1, - batch_size=config.batch_size) - step_size = dataset.get_dataset_size() - - # convert fusion network to quantization aware network - quantizer = QuantizationAwareTraining(bn_fold=True, - per_channel=[True, False], - symmetric=[True, False]) - network = quantizer.quantize(network) - - # get learning rate - lr = Tensor(get_lr(global_step=config.start_epoch * step_size, - lr_init=0, - lr_end=0, - lr_max=config.lr, - warmup_epochs=config.warmup_epochs, - total_epochs=epoch_size + config.start_epoch, - steps_per_epoch=step_size)) - - # define optimization - opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum, - config.weight_decay) - # define model - model = Model(network, loss_fn=loss, optimizer=opt) - - print("============== Starting Training ==============") - monitor = Monitor(lr_init=lr.asnumpy(), - step_threshold=config.step_threshold) - callback = [monitor] - model.train(epoch_size, dataset, callbacks=callback, - dataset_sink_mode=False) - print("============== End Training ==============") - - export_time_used = 650 - train_time = monitor.step_mseconds - print('train_time_used:{}'.format(train_time)) - assert train_time < export_time_used - expect_avg_step_loss = 2.32 - avg_step_loss = np.mean(np.array(monitor.losses)) - print("average step loss:{}".format(avg_step_loss)) - assert avg_step_loss < expect_avg_step_loss - - -@pytest.mark.level0 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_single -def test_mobilenetv2_quant(): - """ - test_mobilenetv2_quant - Features: test_mobilenetv2_quant - Description: test_mobilenetv2_quant graph mode - Expectation: None - """ - set_seed(1) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - train() - - -if __name__ == '__main__': - test_mobilenetv2_quant() diff --git a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py deleted file mode 100644 index a6dfca916f8..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Train Mobilenetv2_quant gpu on Cifar10""" - - -import pytest -import numpy as np -from easydict import EasyDict as ed - -from mindspore import context -from mindspore import Tensor -from mindspore import nn -from mindspore.train.model import Model -from mindspore.compression.quant import QuantizationAwareTraining -from mindspore.common import set_seed - -from dataset import create_dataset -from lr_generator import get_lr -from utils import Monitor, CrossEntropyWithLabelSmooth -from mobilenetV2 import mobilenetV2 - -config_ascend_quant = ed({ - "num_classes": 10, - "image_height": 224, - "image_width": 224, - "batch_size": 300, - "step_threshold": 10, - "data_load_mode": "mindata", - "epoch_size": 1, - "start_epoch": 200, - "warmup_epochs": 1, - "lr": 0.05, - "momentum": 0.997, - "weight_decay": 4e-5, - "label_smooth": 0.1, - "loss_scale": 1024, - "save_checkpoint": True, - "save_checkpoint_epochs": 1, - "keep_checkpoint_max": 300, - "save_checkpoint_path": "./checkpoint", -}) - -dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/" - -@pytest.mark.level2 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_single -def test_mobilenetv2_quant(): - set_seed(1) - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - config = config_ascend_quant - print("training configure: {}".format(config)) - - epoch_size = config.epoch_size - - # define network - network = mobilenetV2(num_classes=config.num_classes) - # define loss - if config.label_smooth > 0: - loss = CrossEntropyWithLabelSmooth( - smooth_factor=config.label_smooth, num_classes=config.num_classes) - else: - loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - # define dataset - dataset = create_dataset(dataset_path=dataset_path, - config=config, - repeat_num=1, - batch_size=config.batch_size) - step_size = dataset.get_dataset_size() - - # convert fusion network to quantization aware network - quantizer = QuantizationAwareTraining(bn_fold=True, - per_channel=[True, False], - symmetric=[False, False]) - network = quantizer.quantize(network) - - # get learning rate - lr = Tensor(get_lr(global_step=config.start_epoch * step_size, - lr_init=0, - lr_end=0, - lr_max=config.lr, - warmup_epochs=config.warmup_epochs, - total_epochs=epoch_size + config.start_epoch, - steps_per_epoch=step_size)) - - # define optimization - opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum, - config.weight_decay) - # define model - model = Model(network, loss_fn=loss, optimizer=opt) - - print("============== Starting Training ==============") - monitor = Monitor(lr_init=lr.asnumpy(), - step_threshold=config.step_threshold) - callback = [monitor] - model.train(epoch_size, dataset, callbacks=callback, - dataset_sink_mode=False) - print("============== End Training ==============") - train_time = monitor.step_mseconds - print('train_time_used:{}'.format(train_time)) - avg_step_loss = np.mean(np.array(monitor.losses)) - print("average step loss:{}".format(avg_step_loss)) - expect_avg_step_loss = 2.32 - assert avg_step_loss < expect_avg_step_loss - export_time_used = 960 - assert train_time < export_time_used - -if __name__ == '__main__': - test_mobilenetv2_quant() diff --git a/tests/st/quantization/mobilenetv2_quant/utils.py b/tests/st/quantization/mobilenetv2_quant/utils.py deleted file mode 100644 index 2849cb211e3..00000000000 --- a/tests/st/quantization/mobilenetv2_quant/utils.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""MobileNetV2 utils""" - -import time -import numpy as np - -from mindspore.train.callback import Callback -from mindspore import Tensor -from mindspore import nn -from mindspore.nn.loss.loss import LossBase -from mindspore.ops import operations as P -from mindspore.ops import functional as F -from mindspore.common import dtype as mstype - - -class Monitor(Callback): - """ - Monitor loss and time. - - Args: - lr_init (numpy array): train lr - - Returns: - None - - Examples: - >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) - """ - - def __init__(self, lr_init=None, step_threshold=10): - super(Monitor, self).__init__() - self.lr_init = lr_init - self.lr_init_len = len(lr_init) - self.step_threshold = step_threshold - self.step_mseconds = 50000 - - def epoch_begin(self, run_context): - self.losses = [] - self.epoch_time = time.time() - - def epoch_end(self, run_context): - cb_params = run_context.original_args() - - epoch_mseconds = (time.time() - self.epoch_time) * 1000 - per_step_mseconds = epoch_mseconds / cb_params.batch_num - print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds, - per_step_mseconds, - np.mean(self.losses))) - self.epoch_mseconds = epoch_mseconds - - def step_begin(self, run_context): - self.step_time = time.time() - - def step_end(self, run_context): - cb_params = run_context.original_args() - step_mseconds = (time.time() - self.step_time) * 1000 - self.step_mseconds = min(self.step_mseconds, step_mseconds) - step_loss = cb_params.net_outputs - - if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): - step_loss = step_loss[0] - if isinstance(step_loss, Tensor): - step_loss = np.mean(step_loss.asnumpy()) - - self.losses.append(step_loss) - cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num - - print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format( - cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch + - 1, cb_params.batch_num, step_loss, - np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1])) - - if cb_params.cur_step_num == self.step_threshold: - run_context.request_stop() - - -class CrossEntropyWithLabelSmooth(LossBase): - """ - CrossEntropyWith LabelSmooth. - - Args: - smooth_factor (float): smooth factor, default=0. - num_classes (int): num classes - - Returns: - None. - - Examples: - >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000) - """ - - def __init__(self, smooth_factor=0., num_classes=1000): - super(CrossEntropyWithLabelSmooth, self).__init__() - self.onehot = P.OneHot() - self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) - self.off_value = Tensor(1.0 * smooth_factor / - (num_classes - 1), mstype.float32) - self.ce = nn.SoftmaxCrossEntropyWithLogits() - self.mean = P.ReduceMean(False) - self.cast = P.Cast() - - def construct(self, logit, label): - one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], - self.on_value, self.off_value) - out_loss = self.ce(logit, one_hot_label) - out_loss = self.mean(out_loss, 0) - return out_loss diff --git a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py index 10e9054f3c5..f8b3507fa5b 100644 --- a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py +++ b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py @@ -22,20 +22,18 @@ from mindspore import nn from mindspore import context from mindspore import Tensor from mindspore.common import set_seed -from mindspore.compression.quant import create_quant_config class Net(nn.Cell): - def __init__(self, qconfig): + def __init__(self): super(Net, self).__init__() - self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), - pad_mode='valid', quant_config=qconfig) + self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid') def construct(self, x): return self.conv(x) + def test_conv2d_bn_fold_quant(): set_seed(1) - quant_config = create_quant_config() - network = Net(quant_config) + network = Net() inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32)) label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32)) opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9) @@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant(): train_network = nn.TrainOneStepCell(net_with_loss, opt) train_network.set_train() out_loss = train_network(inputs, label) + print("------------------", out_loss.asnumpy()) expect_loss = np.array([0.940427]) error = np.array([0.1]) diff = out_loss.asnumpy() - expect_loss assert np.all(abs(diff) < error) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training diff --git a/tests/st/quantization/resnet50_quant/dataset.py b/tests/st/quantization/resnet50_quant/dataset.py deleted file mode 100755 index 69a1f774819..00000000000 --- a/tests/st/quantization/resnet50_quant/dataset.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2020-2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -""" create train dataset. """ - -from functools import partial - -import mindspore.common.dtype as mstype -import mindspore.dataset as ds -import mindspore.dataset.transforms as C2 -import mindspore.dataset.vision as C - - -def create_dataset(dataset_path, config, repeat_num=1, batch_size=32): - """ - create a train dataset - - Args: - dataset_path(string): the path of dataset. - config(EasyDict):the basic config for training - repeat_num(int): the repeat times of dataset. Default: 1. - batch_size(int): the batch size of dataset. Default: 32. - - Returns: - dataset - """ - - load_func = partial(ds.Cifar10Dataset, dataset_path) - data_set = load_func(num_parallel_workers=8, shuffle=False) - - resize_height = config.image_height - resize_width = config.image_width - - mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] - std = [0.229 * 255, 0.224 * 255, 0.225 * 255] - - # define map operations - resize_op = C.Resize((resize_height, resize_width)) - normalize_op = C.Normalize(mean=mean, std=std) - changeswap_op = C.HWC2CHW() - c_trans = [resize_op, normalize_op, changeswap_op] - - type_cast_op = C2.TypeCast(mstype.int32) - - data_set = data_set.map(operations=c_trans, input_columns="image", - num_parallel_workers=8) - data_set = data_set.map(operations=type_cast_op, - input_columns="label", num_parallel_workers=8) - - # apply batch operations - data_set = data_set.batch(batch_size, drop_remainder=True) - - # apply dataset repeat operation - data_set = data_set.repeat(repeat_num) - - return data_set diff --git a/tests/st/quantization/resnet50_quant/lr_generator.py b/tests/st/quantization/resnet50_quant/lr_generator.py deleted file mode 100755 index fe2a971ebfc..00000000000 --- a/tests/st/quantization/resnet50_quant/lr_generator.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""learning rate generator""" - -import math -import numpy as np - - -def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode): - """ - generate learning rate array - - Args: - lr_init(float): init learning rate - lr_end(float): end learning rate - lr_max(float): max learning rate - warmup_epochs(int): number of warmup epochs - total_epochs(int): total epoch of training - steps_per_epoch(int): steps of one epoch - lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default - - Returns: - np.array, learning rate array - """ - lr_each_step = [] - total_steps = steps_per_epoch * total_epochs - warmup_steps = steps_per_epoch * warmup_epochs - if lr_decay_mode == 'steps': - decay_epoch_index = [0.3 * total_steps, - 0.6 * total_steps, 0.8 * total_steps] - for i in range(total_steps): - if i < decay_epoch_index[0]: - lr = lr_max - elif i < decay_epoch_index[1]: - lr = lr_max * 0.1 - elif i < decay_epoch_index[2]: - lr = lr_max * 0.01 - else: - lr = lr_max * 0.001 - lr_each_step.append(lr) - elif lr_decay_mode == 'poly': - if warmup_steps != 0: - inc_each_step = (float(lr_max) - float(lr_init)) / \ - float(warmup_steps) - else: - inc_each_step = 0 - for i in range(total_steps): - if i < warmup_steps: - lr = float(lr_init) + inc_each_step * float(i) - else: - base = (1.0 - (float(i) - float(warmup_steps)) / - (float(total_steps) - float(warmup_steps))) - lr = float(lr_max) * base * base - if lr < 0.0: - lr = 0.0 - lr_each_step.append(lr) - elif lr_decay_mode == 'cosine': - decay_steps = total_steps - warmup_steps - for i in range(total_steps): - if i < warmup_steps: - lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps) - lr = float(lr_init) + lr_inc * (i + 1) - else: - linear_decay = (total_steps - i) / decay_steps - cosine_decay = 0.5 * \ - (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) - decayed = linear_decay * cosine_decay + 0.00001 - lr = lr_max * decayed - lr_each_step.append(lr) - else: - for i in range(total_steps): - if i < warmup_steps: - lr = lr_init + (lr_max - lr_init) * i / warmup_steps - else: - lr = lr_max - (lr_max - lr_end) * \ - (i - warmup_steps) / (total_steps - warmup_steps) - lr_each_step.append(lr) - - learning_rate = np.array(lr_each_step).astype(np.float32) - - return learning_rate diff --git a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py b/tests/st/quantization/resnet50_quant/resnet_quant_manual.py deleted file mode 100644 index 32693785c24..00000000000 --- a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""ResNet.""" -import numpy as np -import mindspore.nn as nn -import mindspore.common.initializer as weight_init -from mindspore.ops import operations as P -from mindspore import Tensor -from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant -from mindspore.compression.quant import create_quant_config - -_ema_decay = 0.999 -_symmetric = True -_fake = True -_per_channel = True -_quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False)) - - -def _weight_variable(shape, factor=0.01): - init_value = np.random.randn(*shape).astype(np.float32) * factor - return Tensor(init_value) - - -def _conv3x3(in_channel, out_channel, stride=1): - weight_shape = (out_channel, in_channel, 3, 3) - weight = _weight_variable(weight_shape) - return nn.Conv2d(in_channel, out_channel, - kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight) - - -def _conv1x1(in_channel, out_channel, stride=1): - weight_shape = (out_channel, in_channel, 1, 1) - weight = _weight_variable(weight_shape) - return nn.Conv2d(in_channel, out_channel, - kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight) - - -def _conv7x7(in_channel, out_channel, stride=1): - weight_shape = (out_channel, in_channel, 7, 7) - weight = _weight_variable(weight_shape) - return nn.Conv2d(in_channel, out_channel, - kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) - - -def _bn(channel): - return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, - gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) - - -def _bn_last(channel): - return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, - gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1) - - -def _fc(in_channel, out_channel): - weight_shape = (out_channel, in_channel) - weight = _weight_variable(weight_shape) - return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) - - -class ConvBNReLU(nn.Cell): - """ - Convolution/Depthwise fused with Batchnorm and ReLU block definition. - - Args: - in_planes (int): Input channel. - out_planes (int): Output channel. - kernel_size (int): Input kernel size. - stride (int): Stride size for the first convolutional layer. Default: 1. - groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. - - Returns: - Tensor, output tensor. - - Examples: - >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) - """ - - def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): - super(ConvBNReLU, self).__init__() - padding = (kernel_size - 1) // 2 - conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding, - group=groups, fake=_fake, quant_config=_quant_config) - layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()] - self.features = nn.SequentialCell(layers) - - def construct(self, x): - output = self.features(x) - return output - - -class ResidualBlock(nn.Cell): - """ - ResNet V1 residual block definition. - - Args: - in_channel (int): Input channel. - out_channel (int): Output channel. - stride (int): Stride size for the first convolutional layer. Default: 1. - - Returns: - Tensor, output tensor. - - Examples: - >>> ResidualBlock(3, 256, stride=2) - """ - expansion = 4 - - def __init__(self, - in_channel, - out_channel, - stride=1): - super(ResidualBlock, self).__init__() - - channel = out_channel // self.expansion - self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1) - self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride) - self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake, - quant_config=_quant_config, - kernel_size=1, stride=1, pad_mode='same', padding=0), - FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False) - ]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake, - quant_config=_quant_config, - kernel_size=1, stride=1, - pad_mode='same', padding=0) - - self.down_sample = False - - if stride != 1 or in_channel != out_channel: - self.down_sample = True - self.down_sample_layer = None - - if self.down_sample: - self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel, - quant_config=_quant_config, - kernel_size=1, stride=stride, - pad_mode='same', padding=0), - FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, - symmetric=False) - ]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel, - fake=_fake, - quant_config=_quant_config, - kernel_size=1, - stride=stride, - pad_mode='same', - padding=0) - self.add = nn.TensorAddQuant() - self.relu = P.ReLU() - - def construct(self, x): - identity = x - out = self.conv1(x) - out = self.conv2(out) - out = self.conv3(out) - - if self.down_sample: - identity = self.down_sample_layer(identity) - - out = self.add(out, identity) - out = self.relu(out) - - return out - - -class ResNet(nn.Cell): - """ - ResNet architecture. - - Args: - block (Cell): Block for network. - layer_nums (list): Numbers of block in different layers. - in_channels (list): Input channel in each layer. - out_channels (list): Output channel in each layer. - strides (list): Stride size in each layer. - num_classes (int): The number of classes that the training images are belonging to. - Returns: - Tensor, output tensor. - - Examples: - >>> ResNet(ResidualBlock, - >>> [3, 4, 6, 3], - >>> [64, 256, 512, 1024], - >>> [256, 512, 1024, 2048], - >>> [1, 2, 2, 2], - >>> 10) - """ - - def __init__(self, - block, - layer_nums, - in_channels, - out_channels, - strides, - num_classes): - super(ResNet, self).__init__() - - if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: - raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") - - self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") - - self.layer1 = self._make_layer(block, - layer_nums[0], - in_channel=in_channels[0], - out_channel=out_channels[0], - stride=strides[0]) - self.layer2 = self._make_layer(block, - layer_nums[1], - in_channel=in_channels[1], - out_channel=out_channels[1], - stride=strides[1]) - self.layer3 = self._make_layer(block, - layer_nums[2], - in_channel=in_channels[2], - out_channel=out_channels[2], - stride=strides[2]) - self.layer4 = self._make_layer(block, - layer_nums[3], - in_channel=in_channels[3], - out_channel=out_channels[3], - stride=strides[3]) - - self.mean = P.ReduceMean(keep_dims=True) - self.flatten = nn.Flatten() - self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config) - self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay) - - # init weights - self._initialize_weights() - - def _make_layer(self, block, layer_num, in_channel, out_channel, stride): - """ - Make stage network of ResNet. - - Args: - block (Cell): Resnet block. - layer_num (int): Layer number. - in_channel (int): Input channel. - out_channel (int): Output channel. - stride (int): Stride size for the first convolutional layer. - - Returns: - SequentialCell, the output layer. - - Examples: - >>> _make_layer(ResidualBlock, 3, 128, 256, 2) - """ - layers = [] - - resnet_block = block(in_channel, out_channel, stride=stride) - layers.append(resnet_block) - - for _ in range(1, layer_num): - resnet_block = block(out_channel, out_channel, stride=1) - layers.append(resnet_block) - - return nn.SequentialCell(layers) - - def construct(self, x): - x = self.conv1(x) - c1 = self.maxpool(x) - - c2 = self.layer1(c1) - c3 = self.layer2(c2) - c4 = self.layer3(c3) - c5 = self.layer4(c4) - - out = self.mean(c5, (2, 3)) - out = self.flatten(out) - out = self.end_point(out) - out = self.output_fake(out) - return out - - def _initialize_weights(self): - - self.init_parameters_data() - for _, m in self.cells_and_names(): - np.random.seed(1) - - if isinstance(m, nn.Conv2dBnFoldQuant): - m.weight.set_data(weight_init.initializer(weight_init.Normal(), - m.weight.shape, - m.weight.dtype)) - elif isinstance(m, nn.DenseQuant): - m.weight.set_data(weight_init.initializer(weight_init.Normal(), - m.weight.shape, - m.weight.dtype)) - elif isinstance(m, nn.Conv2dBnWithoutFoldQuant): - m.weight.set_data(weight_init.initializer(weight_init.Normal(), - m.weight.shape, - m.weight.dtype)) - - -def resnet50_quant(class_num=10): - """ - Get ResNet50 neural network. - - Args: - class_num (int): Class number. - - Returns: - Cell, cell instance of ResNet50 neural network. - - Examples: - >>> net = resnet50_quant(10) - """ - return ResNet(ResidualBlock, - [3, 4, 6, 3], - [64, 256, 512, 1024], - [256, 512, 1024, 2048], - [1, 2, 2, 2], - class_num) - - -def resnet101_quant(class_num=1001): - """ - Get ResNet101 neural network. - - Args: - class_num (int): Class number. - - Returns: - Cell, cell instance of ResNet101 neural network. - - Examples: - >>> net = resnet101(1001) - """ - return ResNet(ResidualBlock, - [3, 4, 23, 3], - [64, 256, 512, 1024], - [256, 512, 1024, 2048], - [1, 2, 2, 2], - class_num) diff --git a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py b/tests/st/quantization/resnet50_quant/test_resnet50_quant.py deleted file mode 100755 index d5a2971de23..00000000000 --- a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Train Resnet50_quant on Cifar10""" - -import pytest -import numpy as np -from easydict import EasyDict as ed - -from mindspore import context -from mindspore import Tensor -from mindspore.nn.optim.momentum import Momentum -from mindspore.train.model import Model -from mindspore.compression.quant import QuantizationAwareTraining -from mindspore import set_seed - -from resnet_quant_manual import resnet50_quant -from dataset import create_dataset -from lr_generator import get_lr -from utils import Monitor, CrossEntropy - - -config_quant = ed({ - "class_num": 10, - "batch_size": 128, - "step_threshold": 20, - "loss_scale": 1024, - "momentum": 0.9, - "weight_decay": 1e-4, - "epoch_size": 1, - "pretrained_epoch_size": 90, - "buffer_size": 1000, - "image_height": 224, - "image_width": 224, - "data_load_mode": "original", - "save_checkpoint": True, - "save_checkpoint_epochs": 1, - "keep_checkpoint_max": 50, - "save_checkpoint_path": "./", - "warmup_epochs": 0, - "lr_decay_mode": "cosine", - "use_label_smooth": True, - "label_smooth_factor": 0.1, - "lr_init": 0, - "lr_max": 0.005, -}) - -dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/" - - -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -def test_resnet50_quant(): - set_seed(1) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") - config = config_quant - print("training configure: {}".format(config)) - epoch_size = config.epoch_size - - # define network - net = resnet50_quant(class_num=config.class_num) - net.set_train(True) - - # define loss - if not config.use_label_smooth: - config.label_smooth_factor = 0.0 - loss = CrossEntropy( - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) - #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) - - # define dataset - dataset = create_dataset(dataset_path=dataset_path, - config=config, - repeat_num=1, - batch_size=config.batch_size) - step_size = dataset.get_dataset_size() - - # convert fusion network to quantization aware network - quantizer = QuantizationAwareTraining(bn_fold=True, - per_channel=[True, False], - symmetric=[True, False]) - net = quantizer.quantize(net) - - # get learning rate - lr = Tensor(get_lr(lr_init=config.lr_init, - lr_end=0.0, - lr_max=config.lr_max, - warmup_epochs=config.warmup_epochs, - total_epochs=config.epoch_size, - steps_per_epoch=step_size, - lr_decay_mode='cosine')) - - # define optimization - opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, - config.weight_decay, config.loss_scale) - - # define model - #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) - model = Model(net, loss_fn=loss, optimizer=opt) - - print("============== Starting Training ==============") - monitor = Monitor(lr_init=lr.asnumpy(), - step_threshold=config.step_threshold) - - callbacks = [monitor] - model.train(epoch_size, dataset, callbacks=callbacks, - dataset_sink_mode=False) - print("============== End Training ==============") - - expect_avg_step_loss = 2.60 - avg_step_loss = np.mean(np.array(monitor.losses)) - - print("average step loss:{}".format(avg_step_loss)) - assert avg_step_loss < expect_avg_step_loss - - -if __name__ == '__main__': - test_resnet50_quant() diff --git a/tests/st/quantization/resnet50_quant/utils.py b/tests/st/quantization/resnet50_quant/utils.py deleted file mode 100644 index e3f3c26d864..00000000000 --- a/tests/st/quantization/resnet50_quant/utils.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Resnet50 utils""" - -import time -import numpy as np - -from mindspore.train.callback import Callback -from mindspore import Tensor -from mindspore import nn -from mindspore.nn.loss.loss import LossBase -from mindspore.ops import operations as P -from mindspore.ops import functional as F -from mindspore.common import dtype as mstype - - -class Monitor(Callback): - """ - Monitor loss and time. - - Args: - lr_init (numpy array): train lr - - Returns: - None - - Examples: - >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) - """ - - def __init__(self, lr_init=None, step_threshold=10): - super(Monitor, self).__init__() - self.lr_init = lr_init - self.lr_init_len = len(lr_init) - self.step_threshold = step_threshold - - def epoch_begin(self, run_context): - self.losses = [] - self.epoch_time = time.time() - - def epoch_end(self, run_context): - cb_params = run_context.original_args() - - epoch_mseconds = (time.time() - self.epoch_time) * 1000 - per_step_mseconds = epoch_mseconds / cb_params.batch_num - print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds, - per_step_mseconds, - np.mean(self.losses))) - self.epoch_mseconds = epoch_mseconds - - def step_begin(self, run_context): - self.step_time = time.time() - - def step_end(self, run_context): - cb_params = run_context.original_args() - step_mseconds = (time.time() - self.step_time) * 1000 - step_loss = cb_params.net_outputs - - if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): - step_loss = step_loss[0] - if isinstance(step_loss, Tensor): - step_loss = np.mean(step_loss.asnumpy()) - - self.losses.append(step_loss) - cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num - - print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format( - cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch + - 1, cb_params.batch_num, step_loss, - np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1])) - - if cb_params.cur_step_num == self.step_threshold: - run_context.request_stop() - - -class CrossEntropy(LossBase): - """the redefined loss function with SoftmaxCrossEntropyWithLogits""" - - def __init__(self, smooth_factor=0, num_classes=1001): - super(CrossEntropy, self).__init__() - self.onehot = P.OneHot() - self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) - self.off_value = Tensor(1.0 * smooth_factor / - (num_classes - 1), mstype.float32) - self.ce = nn.SoftmaxCrossEntropyWithLogits() - self.mean = P.ReduceMean(False) - - def construct(self, logit, label): - one_hot_label = self.onehot(label, F.shape( - logit)[1], self.on_value, self.off_value) - loss = self.ce(logit, one_hot_label) - loss = self.mean(loss, 0) - return loss