!47033 remove compression and testcases in mindspore

Merge pull request !47033 from hangq/wood
This commit is contained in:
i-robot 2022-12-23 08:20:27 +00:00 committed by Gitee
commit b6653ab2d6
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
43 changed files with 7 additions and 6186 deletions

View File

@ -289,7 +289,6 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental

View File

@ -164,7 +164,6 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental

View File

@ -250,7 +250,6 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental

View File

@ -23,9 +23,6 @@ mindspore.export
- **kwargs** (dict) - 配置选项字典。 - **kwargs** (dict) - 配置选项字典。
- **quant_mode** (str) - 如果网络是量化感知训练网络,那么 `quant_mode` 需要设置为"QUANT",否则 `quant_mode` 需要设置为"NONQUANT"。
- **mean** (float) - 预处理后输入数据的平均值用于量化网络的第一层。默认值127.5。
- **std_dev** (float) - 预处理后输入数据的方差用于量化网络的第一层。默认值127.5。
- **enc_key** (str) - 用于加密的字节类型密钥有效长度为16、24或者32。 - **enc_key** (str) - 用于加密的字节类型密钥有效长度为16、24或者32。
- **enc_mode** (Union[str, function]) - 指定加密模式,当设置 `enc_key` 时启用。 - **enc_mode** (Union[str, function]) - 指定加密模式,当设置 `enc_key` 时启用。

View File

@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) {
"Get the number of parallel operators.") "Get the number of parallel operators.")
.def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"), .def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
"Get Allreduce Fusion Dictionary.") "Get Allreduce Fusion Dictionary.")
.def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"),
"Fetch the inputs of Conv or Matmul for quant export.")
.def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"), .def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
"Build data graph.") "Build data graph.")
.def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"), .def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"),

View File

@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() {
ConfigManager::GetInstance().ResetConfig(); ConfigManager::GetInstance().ResetConfig();
} }
void GraphExecutorPy::GetWeightInfo(
const CNodePtr &root_node, const AnfNodePtr &weight_node,
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const {
MS_EXCEPTION_IF_NULL(root_node);
MS_EXCEPTION_IF_NULL(fake_quant_table);
std::string weight_name;
auto x = root_node->input(1);
MS_EXCEPTION_IF_NULL(x);
if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
weight_name = weight_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
} else {
auto para = weight_node->cast_ptr<Parameter>();
MS_EXCEPTION_IF_NULL(para);
weight_name = para->name();
}
// find the fakequant from input
int64_t count = 0;
const int64_t max_depth = 5;
auto is_quant_cnode = [](const AnfNodePtr &node) {
return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
};
while (!is_quant_cnode(x)) {
if (count >= max_depth) {
break;
}
auto cnode = x->cast_ptr<CNode>();
if (cnode == nullptr || cnode->size() <= 1) {
break;
}
x = cnode->input(1);
count += 1;
}
if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
(*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
}
// get the fakequant parameter minq's name
if (!is_quant_cnode(x)) {
return;
}
auto cnode = x->cast_ptr<CNode>();
constexpr size_t expect_input_size = 4;
if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) {
return;
}
const size_t fakequant_index = 2;
auto fakequant_min_node = cnode->input(fakequant_index);
if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
return;
}
std::string fakequant_min_node_name;
if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
fakequant_min_node_name = fakequant_min_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
} else {
auto param = fakequant_min_node->cast_ptr<Parameter>();
MS_EXCEPTION_IF_NULL(param);
fakequant_min_node_name = param->name();
}
auto quant_op = GetValuePtr<PrimitivePy>(cnode->input(0));
if (quant_op == nullptr) {
return;
}
(*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name);
}
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> GraphExecutorPy::FetchInfoForQuantExport(
const std::string &phase) {
FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
MS_EXCEPTION_IF_NULL(func_graph);
MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!";
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> fake_quant_table;
auto filter = [](const AnfNodePtr &node) {
return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
};
std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
auto is_quant_cnode = [](const AnfNodePtr &node) {
return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
};
const size_t root_node_size = 3;
const size_t weight_index = 2;
for (const auto &node : nodes) {
auto root_node = node->cast<CNodePtr>();
if (root_node == nullptr || root_node->size() != root_node_size) {
continue;
}
auto weight = root_node->input(weight_index);
if (!is_quant_cnode(weight)) {
auto tuple_node = weight->cast_ptr<CNode>();
if (tuple_node != nullptr) {
auto fake_node = tuple_node->input(1);
if (!is_quant_cnode(fake_node)) {
continue;
} else {
weight = fake_node;
}
}
}
// get parameter weight's name
auto cnode = weight->cast_ptr<CNode>();
MS_EXCEPTION_IF_NULL(cnode);
auto weight_node = cnode->input(weight_index);
MS_EXCEPTION_IF_NULL(weight_node);
if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
continue;
}
GetWeightInfo(root_node, weight_node, &fake_quant_table);
}
return fake_quant_table;
}
void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) { void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) {
// save the graph to GraphExecutorPy // save the graph to GraphExecutorPy
FuncGraphPtr func_graph = info_[phase]->resource->func_graph(); FuncGraphPtr func_graph = info_[phase]->resource->func_graph();

View File

@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
void TerminateDebugger(); void TerminateDebugger();
#endif #endif
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
const std::string &phase);
// Generate a key for mapping function graph // Generate a key for mapping function graph
py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false); py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false);
@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
private: private:
GraphExecutorPy() = default; GraphExecutorPy() = default;
void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const;
void ParallelPostProcess(const string &phase); void ParallelPostProcess(const string &phase);
void GetGeBackendPolicy() const; void GetGeBackendPolicy() const;
// filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after

View File

@ -161,7 +161,6 @@
#include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h" #include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h"
#include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h"
#include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h" #include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h"
#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
#include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h"
#include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h" #include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h"
#include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h" #include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h"
@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr<session::KernelGraph> &kernel_graph
unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>()); unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>());
unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>()); unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>());
unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>()); unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>());
unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerLayerGradUnifyMindIR>());
unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerChannelGradUnifyMindIR>());
auto ms_context = MsContext::GetInstance(); auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context); MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) { if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {

View File

@ -1,233 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
#include <vector>
#include <memory>
#include "include/common/utils/utils.h"
#include "utils/ms_context.h"
#include "backend/common/optimizer/helper.h"
#include "runtime/device/kernel_info.h"
#include "backend/common/session/anf_runtime_algorithm.h"
#include "include/common/utils/anfalgo.h"
#include "utils/trace_base.h"
namespace mindspore {
namespace opt {
void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD(
const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
}
std::vector<AnfNodePtr> lsq_perlayer_grad_d_inputs = {
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDOpName)),
lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3],
lsq_perlayer_grad_inputs[kIndex4]};
auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph);
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d);
lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope());
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL),
common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)};
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL),
common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)};
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get());
common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d);
CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
lsq_perlayer_grad_d_outputs);
}
void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad(
const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
}
if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be "
<< kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
}
std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_inputs = {
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)),
lsq_perlayer_grad_d_outputs[kIndex1]};
auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph);
MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad);
lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope());
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)};
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)};
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get());
(*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad);
}
void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD(
const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
}
std::vector<AnfNodePtr> lsq_perchannel_grad_d_inputs = {
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDOpName)),
lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3],
lsq_perchannel_grad_inputs[kIndex4]};
auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph);
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d);
lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope());
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL),
common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)};
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL),
common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)};
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get());
common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
lsq_perchannel_grad_d_outputs);
}
void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad(
const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
}
if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be "
<< kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size()
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
}
std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_inputs = {
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)),
lsq_perchannel_grad_d_outputs[kIndex1]};
auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph);
MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad);
lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope());
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)};
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)};
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get());
common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad);
(*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad);
}
const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const {
VarPtr Xs = std::make_shared<SeqVar>();
auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradOpName);
return VectorRef({prim, Xs});
}
const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
const AnfNodePtr &node, const EquivPtr &) const {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(func_graph);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
std::vector<AnfNodePtr> lsq_perlayer_grad_d_outputs;
CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs);
if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be "
<< kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
<< trace::DumpSourceLines(node);
}
std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_outputs;
CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs,
&lsq_perlayer_reduce_grad_outputs);
if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) {
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be "
<< kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size()
<< trace::DumpSourceLines(node);
}
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0],
lsq_perlayer_reduce_grad_outputs[0]};
auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
return make_tuple;
}
const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const {
VarPtr Xs = std::make_shared<SeqVar>();
auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradOpName);
return VectorRef({prim, Xs});
}
const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
const AnfNodePtr &node,
const EquivPtr &) const {
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(func_graph);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
std::vector<AnfNodePtr> lsq_perchannel_grad_d_outputs;
CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs);
if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be "
<< kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size()
<< trace::DumpSourceLines(node);
}
std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_outputs;
CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs,
&lsq_perchannel_reduce_grad_outputs);
if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) {
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be "
<< kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size()
<< trace::DumpSourceLines(node);
}
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0],
lsq_perchannel_reduce_grad_outputs[0]};
auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
return make_tuple;
}
} // namespace opt
} // namespace mindspore

View File

@ -1,72 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
#include <vector>
#include "backend/common/optimizer/optimizer.h"
#include "backend/common/optimizer/helper.h"
namespace mindspore {
namespace opt {
constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2;
constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5;
constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2;
constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad";
constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD";
constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce";
constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad";
constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD";
constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce";
constexpr auto kAttrNeg_trunc = "neg_trunc";
constexpr auto kAttrChannelAxis = "channel_axis";
class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass {
public:
explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true)
: PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {}
~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const;
void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const;
};
class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass {
public:
explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true)
: PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {}
~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const;
void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_

View File

@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy";
constexpr const char kNameReLUV2[] = "ReLUV2"; constexpr const char kNameReLUV2[] = "ReLUV2";
constexpr const char kNameAccumulateNV2[] = "AccumulateNV2"; constexpr const char kNameAccumulateNV2[] = "AccumulateNV2";
constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad"; constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad";
constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars";
constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient";
constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel";
constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient";
constexpr const char kNameActsULQ[] = "ActsULQ"; constexpr const char kNameActsULQ[] = "ActsULQ";
constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad"; constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad";
constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad"; constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad";

View File

@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits<std::vector<int
OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}}; OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}};
REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad)) REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad))
// FakeQuantWithMinMaxVars
INPUT_MAP(FakeQuantWithMinMaxVars) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
ATTR_MAP(FakeQuantWithMinMaxVars) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}};
REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars))
// FakeQuantWithMinMaxVarsGradient
INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
{0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient,
ADPT_DESC(FakeQuantWithMinMaxVarsGradient))
// FakeQuantWithMinMaxVarsPerChannel
INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}};
REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel,
ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel))
// FakeQuantWithMinMaxVarsPerChannelGradient
INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
{0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient,
ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient))
// GreaterEqual // GreaterEqual
INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP; ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;

View File

@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2)
DECLARE_OP_ADAPTER(ConfusionMulGrad) DECLARE_OP_ADAPTER(ConfusionMulGrad)
DECLARE_OP_USE_OUTPUT(ConfusionMulGrad) DECLARE_OP_USE_OUTPUT(ConfusionMulGrad)
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars)
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars)
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient)
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient)
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel)
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel)
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient)
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient)
DECLARE_OP_ADAPTER(GreaterEqual) DECLARE_OP_ADAPTER(GreaterEqual)
DECLARE_OP_USE_OUTPUT(GreaterEqual) DECLARE_OP_USE_OUTPUT(GreaterEqual)

View File

@ -1509,12 +1509,6 @@ class _CellGraphExecutor:
""" """
self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key) self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key)
def fetch_info_for_quant_export(self, exec_id):
"""Get graph proto from pipeline."""
if self._graph_executor.has_compiled(exec_id) is False:
return None
return self._graph_executor.fetch_info_for_quant_export(exec_id)
def ms_memory_recycle(): def ms_memory_recycle():
""" """

View File

@ -1,4 +0,0 @@
approvers:
- zhang_xue_tong
- jpc_chenjianping
- hangangqiang

View File

@ -1,19 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
MindSpore compression module.
Note: This is an experimental interface that is subject to change and/or deletion.
"""

View File

@ -1,24 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Common module for various compression algorithms, now only including datatype definition for quantization.
Note: This is an experimental interface that is subject to change and/or deletion.
"""
from __future__ import absolute_import
from mindspore.compression.common.constant import QuantDtype
__all__ = ["QuantDtype"]

View File

@ -1,124 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Note:
Constant module for compression. This is interface that is subject to change or deletion.
"""
from __future__ import absolute_import
import enum
import re
from types import DynamicClassAttribute
__all__ = ["QuantDtype"]
@enum.unique
class QuantDtype(enum.Enum):
"""
An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`.
"""
INT2 = "INT2"
INT3 = "INT3"
INT4 = "INT4"
INT5 = "INT5"
INT6 = "INT6"
INT7 = "INT7"
INT8 = "INT8"
UINT2 = "UINT2"
UINT3 = "UINT3"
UINT4 = "UINT4"
UINT5 = "UINT5"
UINT6 = "UINT6"
UINT7 = "UINT7"
UINT8 = "UINT8"
def __str__(self):
return f"{self.name}"
@staticmethod
def is_signed(dtype):
"""
Get whether the quant datatype is signed.
Args:
dtype (QuantDtype): quant datatype.
Returns:
bool, whether the input quant datatype is signed.
Examples:
>>> quant_dtype = QuantDtype.INT8
>>> is_signed = QuantDtype.is_signed(quant_dtype)
"""
return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
@staticmethod
def switch_signed(dtype):
"""
Switch the signed state of the input quant datatype.
Args:
dtype (QuantDtype): quant datatype.
Returns:
QuantDtype, quant datatype with opposite signed state as the input.
Examples:
>>> quant_dtype = QuantDtype.INT8
>>> quant_dtype = QuantDtype.switch_signed(quant_dtype)
"""
type_map = {
QuantDtype.INT2: QuantDtype.UINT2,
QuantDtype.INT3: QuantDtype.UINT3,
QuantDtype.INT4: QuantDtype.UINT4,
QuantDtype.INT5: QuantDtype.UINT5,
QuantDtype.INT6: QuantDtype.UINT6,
QuantDtype.INT7: QuantDtype.UINT7,
QuantDtype.INT8: QuantDtype.UINT8,
QuantDtype.UINT2: QuantDtype.INT2,
QuantDtype.UINT3: QuantDtype.INT3,
QuantDtype.UINT4: QuantDtype.INT4,
QuantDtype.UINT5: QuantDtype.INT5,
QuantDtype.UINT6: QuantDtype.INT6,
QuantDtype.UINT7: QuantDtype.INT7,
QuantDtype.UINT8: QuantDtype.INT8
}
return type_map.get(dtype)
@DynamicClassAttribute
def _value(self):
"""The value of the Enum member."""
return int(re.search(r"(\d+)", self._value_).group(1))
@DynamicClassAttribute
def num_bits(self):
"""
Get the num bits of the QuantDtype member.
Returns:
int, the num bits of the QuantDtype member.
Examples:
>>> from mindspore.compression.common import QuantDtype
>>> quant_dtype = QuantDtype.INT8
>>> num_bits = quant_dtype.num_bits
>>> print(num_bits)
8
"""
return self._value

View File

@ -1,19 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Compression export module.
Note: This is an experimental interface that is subject to change and/or deletion.
"""

View File

@ -1,515 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Note:
Export for quantization. This is interface that is subject to change or deletion.
"""
from __future__ import absolute_import
import copy
import numpy as np
from mindspore import log as logger
from mindspore import nn, ops
from mindspore._checkparam import Validator
from mindspore.common import Tensor
from mindspore.common import dtype as mstype
from mindspore.common.api import _cell_graph_executor as _executor
from mindspore.common.parameter import Parameter
from mindspore.nn import Cell
from mindspore.nn.layer import quant
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.ops.operations import _inner_ops as inner
from mindspore.compression.quant import quant_utils
from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
__all__ = ["ExportToQuantInferNetwork"]
class QuantBlock(Cell):
r"""
A quant block of Conv/Dense, activation layer for Ascend deploy.
Calculate Conv or Dense in Int8, with Quant and DeQuant.
Notes:
This block is only for deploy, and not trainable.
Args:
in_channels (int): The number of channels in the input space.
out_channels (int): The number of channels in the output space.
weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
batchnorm (bool): Specifies to used batchnorm or not. Default: None.
activation (string): Specifies activation type. The optional values are as following:
'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
Inputs:
- **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
Outputs:
Tensor of shape :math:`(N, out\_channels)`.
"""
def __init__(self,
core_op,
weight,
quant_op,
dequant_op,
dequant_scale,
bias=None,
activation=None):
super(QuantBlock, self).__init__()
self.core_op = core_op
self.weight = weight
self.quant = quant_op
self.dequant = dequant_op
self.dequant_scale = dequant_scale
self.bias = bias
self.has_bias = bias is not None
self.activation = activation
self.has_act = activation is not None
self.bias_add = P.BiasAdd()
self.sub = P.Sub()
self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
def construct(self, x):
x = self.quant(x)
if self.has_bias:
weight = self.sub(self.weight, self.weight_offset)
x = self.core_op(x, weight)
x = self.bias_add(x, self.bias)
else:
x = self.core_op(x, self.weight)
x = self.dequant(x, self.dequant_scale)
x = F.cast(x, mstype.float32)
if self.has_act:
x = self.activation(x)
return x
def extend_repr(self):
s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
if self.has_bias:
s += f', bias=shape[{self.bias.shape}]'
if self.has_act:
s += f', activation={self.activation}'
s += f', dequant={self.dequant}'
return s
class QuantMindirBlock(Cell):
"""A quant binary block of Conv/Dense, activation layer for export MINDIR model.
Args:
core_op (Cell): The operation cell.
weight (Tensor): The weight of the cell.
bias (Tensor): The bias of the cell. Default: None.
activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
param_dict (dict): The information of the cell.
"""
def __init__(self,
core_op,
weight,
bias=None,
activation=None,
param_dict=None):
super(QuantMindirBlock, self).__init__()
self.core_op = core_op
if activation is not None:
self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
if param_dict["output_maxq"] is not None:
self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
if hasattr(core_op, 'pad_mode'):
self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
self.core_op.add_prim_attr("act_num_bits", Tensor(8))
self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
if param_dict["input_narrow_range"] is not None:
self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
if param_dict["output_narrow_range"] is not None:
self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
if param_dict["input_maxq"] == 'None':
self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
elif param_dict["input_maxq"] is not None:
self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
self.weight = weight
self.bias = bias
self.has_bias = bias is not None
self.activation = activation
self.has_act = activation is not None
self.bias_add = P.BiasAdd()
def construct(self, x):
if self.has_bias:
x = self.core_op(x, self.weight)
x = self.bias_add(x, self.bias)
else:
x = self.core_op(x, self.weight)
if self.has_act:
x = self.activation(x)
return x
def extend_repr(self):
s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
if self.has_bias:
s += f', bias=shape[{self.bias.shape}]'
if self.has_act:
s += f', activation={self.activation}'
return s
class ExportToQuantInferNetwork:
"""
Convert quantization aware network to infer network.
Args:
network (Cell): MindSpore quantization aware training network.
inputs (Tensor): Input tensors of the `quantization aware training network`.
mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
Default: 127.5.
std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
of network. Default: 127.5.
is_mindir (bool): Whether export MINDIR format. Default: False.
Returns:
Cell, Infer network.
"""
def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
network = Validator.check_isinstance('network', network, (nn.Cell,))
self.data_type = mstype.int8
self.network = copy.deepcopy(network)
self.network_bk = copy.deepcopy(network)
self.get_inputs_table(inputs)
self.mean = mean
self.std_dev = std_dev
self.is_mindir = is_mindir
self.upcell = None
@staticmethod
def __get_dequant_scale(scale_a_in, scale_w):
"""Get dequant scale"""
scale_deq = scale_a_in * scale_w
# fuse parameter
# |--------|47:40|--------|39:32|--------|31:0|
# offset_w [8] shift_N [8] deq_scale [32]
float32_deq_scale = scale_deq.astype(np.float32)
uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
scale_length = scale_deq.size # channel
dequant_param = np.zeros(scale_length, dtype=np.uint64)
for index in range(scale_length):
dequant_param[index] += uint32_deq_scale[index]
scale_deq = Tensor(dequant_param, mstype.uint64)
return scale_deq
def get_inputs_table(self, inputs):
"""Get the input quantization parameters of quantization cell for quant export."""
phase_name = 'export_quant'
graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
def run(self):
"""Start to convert."""
logger.warning("The compression module is deprecated and may not be supported in later version, please use "
"MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
self.network.update_cell_prefix()
network = self.network
if isinstance(network, _AddFakeQuantInput):
network = network.network
network = self._convert_quant2deploy(network)
return network
def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
"""convert network's quant subcell to deploy subcell"""
scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
# Build the `Quant` `Dequant` op.
# Quant only support perlayer version. Need check here.
if float(scale_a_in) == 0:
raise ValueError("If `scale_a_in` is zero, will lead to zero error.")
quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
dequant_op = inner.Dequant()
if isinstance(activation, _AddFakeQuantAfterSubCell):
activation = activation.subcell
elif hasattr(activation, "get_origin"):
activation = activation.get_origin()
# get op
if isinstance(cell_core, quant.DenseQuant):
op_core = P.MatMul()
else:
op_core = cell_core.conv
# get the `weight` and `bias`
weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
if self.is_mindir:
block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
else:
block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
return block
def _get_input_quant_param(self, minq_name, np_type, param_dict):
"""get input quant parameter for quant block"""
fake_quant_a_in_prefix = minq_name[:-5]
cells = self.network_bk.cells_and_names()
for cell in cells:
if cell[0].endswith(fake_quant_a_in_prefix):
fake_quant_a_in = cell[1]
break
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
return scale_a_in, zp_a_in
def __get_quant_param(self, cell_core, fake_quant_a_out):
"""get parameter for quant block"""
w_minq_name = cell_core.fake_quant_weight.minq.name
w_maxq_name = cell_core.fake_quant_weight.maxq.name
np_type = mstype.dtype_to_nptype(self.data_type)
param_dict = dict()
param_dict["filter_maxq"] = None
param_dict["filter_minq"] = None
param_dict["output_maxq"] = None
param_dict["output_minq"] = None
param_dict["input_maxq"] = None
param_dict["input_minq"] = None
param_dict["input_narrow_range"] = None
param_dict["output_narrow_range"] = None
param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
param_dict["mean"] = self.mean
param_dict["std_dev"] = self.std_dev
param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
if fake_quant_a_out is not None:
_, _, param_dict["output_maxq"], param_dict["output_minq"] = \
quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
info = self.quant_info_table.get(w_minq_name, None)
if not info:
info = self.quant_info_table.get(w_maxq_name, None)
if info:
_, minq_name = info
if minq_name == 'input':
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
(1 / self.std_dev), round(self.mean), 'None', 'None'
else:
scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
else:
# skip quant layer
scale_a_in, zp_a_in = 1.0, 0.0
return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
"""Get weight and bias for quantizaiton"""
np_type = mstype.dtype_to_nptype(self.data_type)
weight = cell_core.weight.data.asnumpy()
bias = None
if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
if cell_core.has_bias:
bias = cell_core.bias.data.asnumpy()
elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
weight_b = weight
bias_b = bias
# apply the quant
quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
cell_core.fake_quant_weight.num_bits,
cell_core.fake_quant_weight.narrow_range)
weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
if bias is not None:
if 0 in scale_a_in:
raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.")
if 0 in scale_w:
raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.")
bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
if isinstance(cell_core, quant.DenseQuant):
weight = np.transpose(weight)
weight_b = np.transpose(weight_b)
weight_tensor = Tensor(weight, self.data_type)
weight_b_tensor = Tensor(weight_b)
if bias_b is not None:
bias_b_tensor = Tensor(bias_b, mstype.float32)
return weight_tensor, bias, weight_b_tensor, bias_b_tensor
return weight_tensor, bias, weight_b_tensor, None
def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
"""add output quant info for quant op for export mindir."""
if self.is_mindir:
if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
np_type = mstype.dtype_to_nptype(self.data_type)
_, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
origin_op.add_prim_attr('output_maxq', Tensor(maxq))
origin_op.add_prim_attr('output_minq', Tensor(minq))
def _convert_subcell(self, network, change, name, subcell):
"""Convert subcell to ant subcell."""
if subcell is not None and hasattr(subcell, "fake_quant_weight"):
new_subcell = self._get_quant_block(subcell, None, None)
prefix = subcell.param_prefix
new_subcell.update_parameters_name(prefix + '.')
self.upcell = new_subcell
network.insert_child_to_cell(name, new_subcell)
change = True
return network, change
def _convert_conv(self, network, change, name, subcell):
"""Convert subcell to ant subcell for conv."""
cell_core = subcell.conv
activation = subcell.activation
fake_quant_act = None
if hasattr(activation, 'fake_quant_act_before'):
fake_quant_act = activation.fake_quant_act_before
elif hasattr(activation, 'fake_quant_act'):
fake_quant_act = activation.fake_quant_act
if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
self.upcell = None
prefix = subcell.param_prefix
new_subcell.update_parameters_name(prefix + '.')
network.insert_child_to_cell(name, new_subcell)
change = True
return network, change
def _convert_dense(self, network, change, name, subcell):
"""Convert subcell to ant subcell for dense."""
cell_core = subcell.dense
activation = subcell.activation
fake_quant_act = None
if hasattr(activation, 'fake_quant_act_before'):
fake_quant_act = activation.fake_quant_act_before
elif hasattr(activation, 'fake_quant_act'):
fake_quant_act = activation.fake_quant_act
if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
prefix = subcell.param_prefix
new_subcell.update_parameters_name(prefix + '.')
network.insert_child_to_cell(name, new_subcell)
self.upcell = None
change = True
return network, change
def _convert_act(self, subcell):
"""Convert subcell to ant subcell for activation."""
activation = subcell.get_origin()
if isinstance(activation, nn.ReLU):
self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
elif isinstance(activation, nn.ReLU6):
self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
if self.upcell:
self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
return activation
def _convert_add(self, subcell):
"""Convert subcell to ant subcell for add."""
if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
add_op = subcell.add.subcell
subcell.__delattr__("add")
subcell.__setattr__("add", add_op)
add_op = subcell.add
self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
subcell.__delattr__("fake_quant_act")
subcell.__setattr__("fake_quant_act", P.identity())
def _convert_observer(self, network, name, subcell):
"""Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
if self.upcell:
self._add_output_min_max_for_op(self.upcell.core_op, subcell)
network.__delattr__(name)
network.__setattr__(name, P.identity())
def _convert_fake_quant_after_cell(self, network, name, subcell):
"""Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
op = subcell.subcell
self._add_output_min_max_for_op(op, subcell.fake_quant_act)
network.__delattr__(name)
network.__setattr__(name, op)
def _convert_core_quant_subcell(self, network, change, name, subcell):
"""Convert subcell to ant subcell for conv and dense."""
is_core_subcell = True
if isinstance(subcell, nn.Conv2dBnAct):
network, change = self._convert_conv(network, change, name, subcell)
elif isinstance(subcell, nn.DenseBnAct):
network, change = self._convert_dense(network, change, name, subcell)
elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
network, change = self._convert_subcell(network, change, name, subcell)
else:
is_core_subcell = False
return is_core_subcell, network, change
def _convert_other_quant_subcell(self, network, change, name, subcell):
"""Convert subcell to ant subcell for cell except conv and dense."""
is_other_subcell = True
if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
activation = self._convert_act(subcell)
network.insert_child_to_cell(name, activation)
change = True
elif isinstance(subcell, nn.TensorAddQuant):
self._convert_add(subcell)
elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
self._convert_observer(network, name, subcell)
elif isinstance(subcell, _AddFakeQuantAfterSubCell):
self._convert_fake_quant_after_cell(network, name, subcell)
change = True
else:
is_other_subcell = False
return is_other_subcell, network, change
def _convert_quant2deploy(self, network):
"""Convert network's all quant subcell to deploy subcell."""
cells = network.name_cells()
change = False
for name in cells:
subcell = cells[name]
if subcell == network:
continue
is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
if not is_core_quant_subcell and not is_other_quant_subcell:
self.upcell = None
self._convert_quant2deploy(subcell)
if isinstance(network, nn.SequentialCell) and change:
network.cell_list = list(network.cells())
return network

View File

@ -1,28 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Quantization module, including base class of the quantizer, the quantization aware training algorithm,
and quantization utils.
Note: This is an experimental interface that is subject to change and/or deletion.
"""
from __future__ import absolute_import
from .quantizer import OptimizeOption
from .qat import QuantizationAwareTraining, create_quant_config
from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers
__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining",
"create_quant_config", "OptimizeOption"]

View File

@ -1,634 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Quantization aware training
User can use quantization aware to train a model. MindSpore supports quantization aware training,
which models quantization errors in both the forward and backward passes using fake-quantization
operations. Note that the entire computation is carried out in floating point. At the end of quantization
aware training, MindSpore provides conversion functions to convert the trained model into lower precision.
Note: This is an experimental interface that is subject to change and/or deletion.
"""
from __future__ import absolute_import
import re
import numpy as np
import mindspore.context as context
from mindspore import log as logger
from mindspore import nn, ops
from mindspore._checkparam import Validator, Rel
from mindspore.nn.layer import quant
from mindspore.ops import functional as F
from ..common import QuantDtype
from .quantizer import Quantizer, OptimizeOption
from .quant_utils import compute_kl_threshold
__all__ = ["QuantizationAwareTraining", "create_quant_config"]
def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver),
quant_delay=(0, 0),
quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
per_channel=(False, False),
symmetric=(False, False),
narrow_range=(False, False),
mode="DEFAULT"):
r"""
Config the observer type of weights and data flow with quant parameters.
Args:
quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
applies to weights and the second applies to data flow. Currently, only
:class:`FakeQuantWithMinMaxObserver` supported.
Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
during train and eval. The first element represents weights and the second element represents data flow.
Default: (0, 0).
quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
element represents weights and the second element represents data flow.
Default: (QuantDtype.INT8, QuantDtype.INT8).
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
then base on per channel, otherwise base on per layer. The first element represents weights
and the second element represents data flow, and the second element must be `False` now.
Default: (False, False).
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
element represents data flow. Default: (False, False).
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
The first element represents weights and the second element represents data flow.
Default: (False, False).
mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
Default: "DEFAULT".
Returns:
QuantConfig, contains the observer type of weight and activation.
Raises:
ValueError: If the second element of `per_channel` is not `False`.
"""
if per_channel[-1]:
raise ValueError("Arg 'per_channel' second element must be 'False'.")
weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
per_channel=per_channel[0], symmetric=symmetric[0],
narrow_range=narrow_range[0], mode=mode)
act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
per_channel=per_channel[-1], symmetric=symmetric[-1],
narrow_range=narrow_range[-1], mode=mode)
return quant.QuantConfig(weight=weight_observer, activation=act_observer)
class _AddFakeQuantInput(nn.Cell):
"""
Add FakeQuant OP at input of the network. Only support one input case.
"""
def __init__(self, network, quant_delay=0):
super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
quant_delay=quant_delay, ema=True)
self.fake_quant_input.update_parameters_name('fake_quant_input.')
self.network = network
def construct(self, data):
data = self.fake_quant_input(data)
output = self.network(data)
return output
class _AddFakeQuantAfterSubCell(nn.Cell):
"""
Add FakeQuant OP after of the sub Cell.
"""
def __init__(self, subcell, **kwargs):
super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
self.subcell = subcell
self.mode = "DEFAULT"
self.max_init = 6
self.min_init = -6
if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]:
self.mode = "LEARNED_SCALE"
self.max_init = 16
self.min_init = -16
self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init,
max_init=self.max_init,
ema=True,
quant_dtype=kwargs.get("quant_dtype"),
quant_delay=kwargs.get("quant_delay"),
per_channel=kwargs.get("per_channel"),
symmetric=kwargs.get("symmetric"),
narrow_range=kwargs.get("narrow_range"),
mode=self.mode)
def construct(self, *data):
output = self.subcell(*data)
output = self.fake_quant_act(output)
return output
class QuantizationAwareTraining(Quantizer):
r"""
Quantizer for quantization aware training.
Args:
bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
Default: 1e7.
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
during train and eval. The first element represents weights and the second element represents data flow.
Default: (0, 0).
quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
element represents weights and the second element represents data flow. It is necessary to consider the
precision support of hardware devices in the practical quantization infer scenario.
Default: (QuantDtype.INT8, QuantDtype.INT8).
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
then base on per channel, otherwise base on per layer. The first element represents weights and the
second element represents data flow, and the second element must be `False` now. Default: (False, False).
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
element represents data flow. Default: (False, False).
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
The first element represents weights and the second element represents data flow.
Default: (False, False).
optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
`LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
Supported Platforms:
``Ascend`` ``GPU``
Raises:
TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
is not bool.
TypeError: If the element of `quant_dtype` is not `QuantDtype`.
ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
not less than 2.
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).
Examples:
>>> from mindspore.compression.quant import QuantizationAwareTraining
>>> from mindspore import nn
>>> class LeNet5(nn.Cell):
... def __init__(self, num_class=10, channel=1):
... super(LeNet5, self).__init__()
... self.type = "fusion"
... self.num_class = num_class
...
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
... # change `nn.Dense` to `nn.DenseBnAct`
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
... self.fc3 = nn.DenseBnAct(84, self.num_class)
...
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
... self.flatten = nn.Flatten()
...
... def construct(self, x):
... x = self.conv1(x)
... x = self.max_pool2d(x)
... x = self.conv2(x)
... x = self.max_pool2d(x)
... x = self.flatten(x)
... x = self.fc1(x)
... x = self.fc2(x)
... x = self.fc3(x)
... return x
...
>>> net = LeNet5()
>>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
>>> net_qat = quantizer.quantize(net)
"""
__quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"]
def __init__(self,
bn_fold=True,
freeze_bn=10000000,
quant_delay=(0, 0),
quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
per_channel=(False, False),
symmetric=(False, False),
narrow_range=(False, False),
optimize_option=OptimizeOption.QAT,
one_conv_fold=True):
"""Init for QuantizationAwareTraining quantizer"""
super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option)
def convert2list(name, value):
if not isinstance(value, list) and not isinstance(value, tuple):
value = [value]
elif len(value) > 2:
raise ValueError("input `{}` len should less then 2".format(name))
return value
quant_delay_list = convert2list("quant delay", quant_delay)
quant_dtype_list = convert2list("quant dtype", quant_dtype)
per_channel_list = convert2list("per channel", per_channel)
symmetric_list = convert2list("symmetric", symmetric)
narrow_range_list = convert2list("narrow range", narrow_range)
self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay")
self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay")
self.bn_fold = Validator.check_bool(bn_fold, "bn fold")
self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn")
self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype)
self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype)
self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel")
self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel")
self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric")
self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric")
self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range")
self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range")
self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold")
self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv,
nn.DenseBnAct: self._convert_dense}
self.mode = "DEFAULT"
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
self.mode = "LEARNED_SCALE"
if not self.weight_symmetric or not self.act_symmetric:
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support "
"symmetric=(True, True) for quant")
if not self.weight_range or not self.act_range:
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) "
"for quant")
if self.freeze_bn != 0:
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, "
"but get freeze_bn={}".format(self.freeze_bn))
if self.weight_qdelay != 0 or self.act_qdelay != 0:
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)")
self.quant_config = create_quant_config(quant_delay=quant_delay_list,
quant_dtype=quant_dtype_list,
per_channel=per_channel_list,
symmetric=symmetric_list,
narrow_range=narrow_range_list,
mode=self.mode)
self.eps = 1e-5
@staticmethod
def _convert_op_name(name):
pattern = re.compile(r'([A-Z]{1})')
name_new = re.sub(pattern, r'_\1', name).lower()
if name_new[0] == '_':
name_new = name_new[1:]
return name_new
def quantize(self, network):
"""
Quant API to convert input network to a quantization aware training network.
Note:
Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.
Args:
network (Cell): network to be quantized.
Returns:
Cell, a quantization aware training network.
Raises:
KeyError: If the `device_target` set in context is not in `support_device`.
"""
logger.warning("The compression module is deprecated and may not be supported in later version, please use "
"MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
support_device = ["Ascend", "GPU"]
if context.get_context('device_target') not in support_device:
raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option:
network.update_cell_prefix()
network = self._convert_subcells2quant(network)
network.update_cell_type("quant")
return network
def _convert_subcells2quant(self, network):
"""
convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell
"""
cells = network.name_cells()
change = False
for name in cells:
subcell = cells[name]
if subcell == network:
continue
if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)):
prefix = subcell.param_prefix
new_subcell = self._convert_method_map[type(subcell)](subcell)
new_subcell.update_parameters_name(prefix + '.')
network.insert_child_to_cell(name, new_subcell)
change = True
else:
self._convert_subcells2quant(subcell)
if isinstance(network, nn.SequentialCell) and change:
network.cell_list = list(network.cells())
# add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell.
if isinstance(network, (nn.FakeQuantWithMinMaxObserver,
nn.Conv2dBnFoldQuantOneConv,
nn.Conv2dBnFoldQuant,
nn.Conv2dBnWithoutFoldQuant,
nn.Conv2dQuant,
nn.DenseQuant,
nn.ActQuant,
nn.TensorAddQuant,
nn.MulQuant)):
return network
add_list = []
for name in network.__dict__:
if name[0] == '_':
continue
attr = network.__dict__[name]
if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name:
add_list.append((name, attr))
for name, prim_op in add_list:
prefix = name
add_quant = _AddFakeQuantAfterSubCell(prim_op,
quant_dtype=self.act_dtype,
quant_delay=self.act_qdelay,
per_channel=self.act_channel,
symmetric=self.act_symmetric,
narrow_range=self.act_range,
optimize_option=self.optimize_option)
if network.param_prefix:
prefix = '.'.join([network.param_prefix, prefix])
add_quant.update_parameters_name(prefix + '.')
del network.__dict__[name]
network.insert_child_to_cell(name, add_quant)
return network
def _convert_conv(self, subcell):
"""
convert Conv2d cell to quant cell
"""
min_init = -6
max_init = 6
if self.eps == 0:
raise ValueError("`epsilon` is zero may lead to divide zero error")
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
subcell_weight_para = subcell.conv.weight.data.asnumpy()
if subcell.has_bn:
scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
self.quant_config = self.quant_config._replace(
weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
conv_inner = subcell.conv
if subcell.has_bn:
bn_inner = subcell.batchnorm
if self.bn_fold:
if self.one_conv_fold:
conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels,
conv_inner.out_channels,
kernel_size=conv_inner.kernel_size,
stride=conv_inner.stride,
pad_mode=conv_inner.pad_mode,
padding=conv_inner.padding,
dilation=conv_inner.dilation,
group=conv_inner.group,
eps=bn_inner.eps,
momentum=1 - bn_inner.momentum,
has_bias=conv_inner.has_bias,
bias_init=conv_inner.bias_init,
quant_config=self.quant_config,
quant_dtype=self.weight_dtype,
fake=True)
else:
conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels,
conv_inner.out_channels,
kernel_size=conv_inner.kernel_size,
stride=conv_inner.stride,
pad_mode=conv_inner.pad_mode,
padding=conv_inner.padding,
dilation=conv_inner.dilation,
group=conv_inner.group,
eps=bn_inner.eps,
momentum=1 - bn_inner.momentum,
has_bias=conv_inner.has_bias,
bias_init=conv_inner.bias_init,
freeze_bn=self.freeze_bn,
quant_config=self.quant_config,
quant_dtype=self.weight_dtype,
fake=True)
# change original network Batch Normalization OP parameters to quant network
conv_inner.gamma = subcell.batchnorm.gamma
conv_inner.beta = subcell.batchnorm.beta
conv_inner.moving_mean = subcell.batchnorm.moving_mean
conv_inner.moving_variance = subcell.batchnorm.moving_variance
else:
conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels,
conv_inner.out_channels,
kernel_size=conv_inner.kernel_size,
stride=conv_inner.stride,
pad_mode=conv_inner.pad_mode,
padding=conv_inner.padding,
dilation=conv_inner.dilation,
group=conv_inner.group,
eps=bn_inner.eps,
momentum=1 - bn_inner.momentum,
has_bias=conv_inner.has_bias,
bias_init=conv_inner.bias_init,
quant_config=self.quant_config)
# change original network Batch Normalization OP parameters to quant network
conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
conv_inner.batchnorm.beta = subcell.batchnorm.beta
conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance
del subcell.batchnorm
subcell.batchnorm = None
subcell.has_bn = False
else:
conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels,
kernel_size=conv_inner.kernel_size, stride=conv_inner.stride,
pad_mode=conv_inner.pad_mode, padding=conv_inner.padding,
dilation=conv_inner.dilation, group=conv_inner.group,
has_bias=conv_inner.has_bias, quant_config=self.quant_config,
quant_dtype=self.weight_dtype)
# change original network Conv2D OP parameters to quant network
conv_inner.weight = subcell.conv.weight
if subcell.conv.has_bias:
conv_inner.bias = subcell.conv.bias
subcell.conv = conv_inner
if subcell.has_act and subcell.activation is not None:
subcell.activation = self._convert_activation(subcell.activation)
elif subcell.after_fake:
subcell.has_act = True
subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype,
quant_delay=self.act_qdelay, per_channel=self.act_channel,
symmetric=self.act_symmetric, narrow_range=self.act_range,
optimize_option=self.optimize_option)
return subcell
def _convert_dense(self, subcell):
"""
convert dense cell to quant cell
"""
min_init = -6
max_init = 6
if self.eps == 0:
raise ValueError("`epsilon` is zero may lead to divide zero error")
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
subcell_weight_para = subcell.dense.weight.data.asnumpy()
if subcell.has_bn:
scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
self.quant_config = self.quant_config._replace(
weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
dense_inner = subcell.dense
dense_inner = quant.DenseQuant(dense_inner.in_channels,
dense_inner.out_channels,
has_bias=dense_inner.has_bias,
quant_config=self.quant_config,
quant_dtype=self.weight_dtype)
# change original network Dense OP parameters to quant network
dense_inner.weight = subcell.dense.weight
if subcell.dense.has_bias:
dense_inner.bias = subcell.dense.bias
subcell.dense = dense_inner
if subcell.has_act and subcell.activation is not None:
subcell.activation = self._convert_activation(subcell.activation)
elif subcell.after_fake:
subcell.has_act = True
subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
quant_dtype=self.act_dtype,
quant_delay=self.act_qdelay,
per_channel=self.act_channel,
symmetric=self.act_symmetric,
narrow_range=self.act_range,
optimize_option=self.optimize_option)
return subcell
def _convert_activation(self, activation):
"""
convert activation cell to quant cell
"""
act_class = activation.__class__
act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid]
act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish]
if act_class in act_list:
return quant.ActQuant(activation=activation,
quant_config=self.quant_config,
quant_dtype=self.act_dtype)
if act_class in act_list_with_fake_before:
return quant.ActQuant(activation=activation,
ema=True,
fake_before=True,
quant_config=self.quant_config,
quant_dtype=self.act_dtype)
raise ValueError("Unsupported activation in auto quant: ", act_class)
def _kl_init(self, subcell_weight_para, weight_dtype):
"""
Calculate the value of max_init and min_init with compute_kl_threshold.
"""
if self.weight_channel:
max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
for weight_para_each in subcell_weight_para]
min_init = [-x for x in max_init]
else:
max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
min_init = [-x for x in max_init]
return min_init, max_init
def _set_mixed_bits(self, network, strategy):
r"""
Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
optimize_option.
Args:
network (Cell): Input network.
strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
convolution layer is supported.
Returns:
Cell, a network with mixed bit strategy configured.
Raises:
ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
"""
if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
"optimize_option.")
quantizable_idx = []
pass_cell = None
for i, cell_and_name in enumerate(network.cells_and_names()):
cell = cell_and_name[1]
if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
quantizable_idx.append(i)
if len(quantizable_idx) != len(strategy):
raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")
quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
type_map = {
QuantDtype.INT2.num_bits: QuantDtype.INT2,
QuantDtype.INT3.num_bits: QuantDtype.INT3,
QuantDtype.INT4.num_bits: QuantDtype.INT4,
QuantDtype.INT5.num_bits: QuantDtype.INT5,
QuantDtype.INT6.num_bits: QuantDtype.INT6,
QuantDtype.INT7.num_bits: QuantDtype.INT7,
QuantDtype.INT8.num_bits: QuantDtype.INT8
}
if self.eps == 0:
raise ValueError("`epsilon` is zero may lead to divide zero error")
for i, cell_and_name in enumerate(network.cells_and_names()):
cell = cell_and_name[1]
if i not in quantizable_idx:
continue
if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0])
if cell.weight_dtype is None:
raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0])
if isinstance(cell, nn.Conv2dBnAct):
subcell_weight_para = cell.conv.weight.data.asnumpy()
if hasattr(cell.conv, 'gamma'):
scale_factor = (cell.conv.gamma.data.asnumpy() /
np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
min_init=min_init,
max_init=max_init)
elif isinstance(cell, nn.DenseBnAct):
subcell_weight_para = cell.dense.weight.data.asnumpy()
if hasattr(cell.dense, 'gamma'):
scale_factor = (cell.dense.gamma.data.asnumpy() /
np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
min_init=min_init,
max_init=max_init)
return network

View File

@ -1,462 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Quantization utils.
Note: This is an experimental interface that is subject to change and/or deletion.
"""
from __future__ import absolute_import
import numpy as np
from mindspore._checkparam import Validator
from mindspore import log as logger
from ... import nn
__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
def cal_quantization_params(input_min,
input_max,
quant_min,
quant_max,
data_type,
symmetric=False):
r"""
Calculate quantization params for scale and zero point.
Args:
input_min (numpy.ndarray): The dimension of channel or 1.
input_max (numpy.ndarray): The dimension of channel or 1.
quant_min (int): The minimum quantization integer.
quant_max (int): The maximum quantization integer.
data_type (numpy type) : Can be numpy int8, numpy uint8.
symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
Returns:
scale (numpy.ndarray): quantization param.
zero point (numpy.ndarray): quantization param.
"""
if quant_min == quant_max:
raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.")
input_max = np.maximum(0.0, input_max)
input_min = np.minimum(0.0, input_min)
if input_min.shape != input_max.shape:
raise ValueError("input min shape should be equal to input max.")
if len(input_min.shape) > 1:
raise ValueError("input min and max shape should be one dim.")
if (input_min > input_max).all():
raise ValueError("input_min min should be less than input max.")
if (input_max == input_min).all():
return np.ones(input_min.shape), np.zeros(input_min.shape)
# calculate scale
if symmetric:
input_max = np.maximum(-input_min, input_max)
input_min = -input_max
scale = (input_max - input_min) / (quant_max - quant_min)
# calculate zero point
if data_type == np.int8 and symmetric:
zp = np.zeros(input_min.shape)
else:
if scale == 0.0:
raise ValueError("scale can not be 0.")
zp_double = quant_min - input_min / scale
zp = np.floor(zp_double + 0.5)
return scale, zp
def get_quant_min_max(data_type, num_bits=8, narrow_range=False):
"""Calculate quantization params for minimum/maximum quantization integer"""
if data_type == np.int8:
quant_min = 0 - 2 ** (num_bits - 1)
quant_max = 2 ** (num_bits - 1) - 1
elif data_type == np.uint8:
quant_min = 0
quant_max = 2 ** num_bits - 1
else:
raise ValueError("Unsupported datatype({})".format(data_type))
if narrow_range:
quant_min = quant_min + 1
return quant_min, quant_max
def weight2int(data, scale, zero_point, quant_min, quant_max):
r"""
Calculate int8/uint8 weight from fp32. the formula is defined as:
.. math::
int8/uint8 = round(float/scale) + offset
Args:
data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
scale (numpy.ndarray): The dimension of channel or 1.
zero_point (numpy.ndarray): The dimension of channel or 1.
quant_min (int): The minimum quantization integer.
quant_max (int): The maximum quantization integer.
Returns:
weight (numpy.ndarray): The dimension of channel or 1.
"""
if scale.shape != zero_point.shape:
raise ValueError("`scale` and `zero_point` should have the same shape.")
if scale.shape[0] < 0:
raise ValueError("`scale` and `zero_point` shape should be greater than zero.")
if 0 in scale:
raise ValueError("Zero exist in `scale` which will lead to divide zero error.")
if len(scale.shape) >= 1 and scale.shape[0] > 1:
# for perchannel
if scale.shape[0] == data.shape[0]:
# `Conv2d` or `Dense` op weight
shape_list = [-1] + [1] * len(data.shape[1:])
scale = scale.reshape(shape_list)
zero_point = zero_point.reshape(shape_list)
elif scale.shape[0] == data.shape[1]:
# `DepthwiseConv2d` op weight
shape_list = [1, -1] + [1] * len(data.shape[2:])
scale = scale.reshape(shape_list)
zero_point = zero_point.reshape(shape_list)
else:
raise ValueError("Unsupported weight shape({})".format(data.shape))
weight_int = np.round((data / scale) + zero_point)
weight_int[weight_int > quant_max] = quant_max
weight_int[weight_int < quant_min] = quant_min
return weight_int
def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
"""Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
minq = cell.minq.data.asnumpy()
maxq = cell.maxq.data.asnumpy()
# make sure maxq > 0 and minq <= 0
if cell.mode == 'LEARNED_SCALE':
maxq = np.abs(maxq)
minq = -np.abs(minq)
quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range)
symmetric = cell.symmetric and not cell.neg_trunc
scale, zp = cal_quantization_params(
minq, maxq,
quant_min, quant_max, data_type,
symmetric=symmetric)
return scale, zp, maxq, minq
def fold_batchnorm(weight, cell_quant):
r"""
Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
Args:
weight (numpy.ndarray): Weight of `cell_quant`.
cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
Returns:
weight (numpy.ndarray): Folded weight.
bias (numpy.ndarray): Folded bias.
"""
variance = cell_quant.moving_variance.data.asnumpy()
mean = cell_quant.moving_mean.data.asnumpy()
gamma = cell_quant.gamma.data.asnumpy()
beta = cell_quant.beta.data.asnumpy()
epsilon = cell_quant.eps
if epsilon == 0:
raise ValueError("`epsilon` is zero may lead to divide zero error")
sigma = np.sqrt(variance + epsilon)
if gamma.shape[0] == weight.shape[0]:
# `Conv2d` or `Dense` op weight
shape_list = [-1] + [1] * len(weight.shape[1:])
_gamma = gamma.reshape(shape_list)
_sigma = sigma.reshape(shape_list)
elif gamma.shape[0] == weight.shape[1]:
# `DepthwiseConv2d` op weight
shape_list = [1, -1] + [1] * len(weight.shape[2:])
_gamma = gamma.reshape(shape_list)
_sigma = sigma.reshape(shape_list)
else:
raise ValueError("Unsupported weight shape({})".format(weight.shape))
weight = weight * _gamma / _sigma
bias = beta - gamma * mean / sigma
return weight, bias
def without_fold_batchnorm(weight, cell_quant):
r"""
Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
Args:
weight (numpy.ndarray): Weight of `cell_quant`.
cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
Returns:
weight (numpy.ndarray): whihout folded weight.
bias (numpy.ndarray): without folded bias.
"""
variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
gamma = cell_quant.batchnorm.gamma.data.asnumpy()
beta = cell_quant.batchnorm.beta.data.asnumpy()
epsilon = cell_quant.batchnorm.eps
if epsilon == 0:
raise ValueError("`epsilon` is zero may lead to divide zero error")
sigma = np.sqrt(variance + epsilon)
if gamma.shape[0] == weight.shape[0]:
# `Conv2d` or `Dense` op weight
shape_list = [-1] + [1] * len(weight.shape[1:])
_gamma = gamma.reshape(shape_list)
_sigma = sigma.reshape(shape_list)
elif gamma.shape[0] == weight.shape[1]:
# `DepthwiseConv2d` op weight
shape_list = [1, -1] + [1] * len(weight.shape[2:])
_gamma = gamma.reshape(shape_list)
_sigma = sigma.reshape(shape_list)
else:
raise ValueError("Unsupported weight shape({})".format(weight.shape))
weight = weight * _gamma / _sigma
bias = beta - gamma * mean / sigma
return weight, bias
def compute_kl_threshold(data, bitwidth):
r"""
Using KL-J Distance to calculate the clip threshold.
Args:
- **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
- **bitwidth** (QuantDtype) - The datatype of quantization.
Outputs:
Tensor with Shape 1. Threshold to calculate the data.
"""
data_max = np.abs(data).max()
if data_max < 1e-5:
return 1e-5
hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
# For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
# largest size, turn to use the default bins config.
largest_bin_size = 1024
if hist.shape[0] > largest_bin_size:
hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
sum_ = np.sum(hist)
if sum_ == 0:
hist = 0
else:
hist = hist / sum_
cumsum = np.cumsum(hist)
bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
threshold = []
scaling_factor = []
kl = []
if bit_pow_range + 1 > len(bin_edges) - 1:
th_layer_out = bin_edges[-1]
return float(th_layer_out)
for i in range(bit_pow_range + 1, len(bin_edges), 1):
threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
threshold = np.concatenate((threshold, [threshold_tmp]))
scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
# forward interpolation
cumsum_tmp = np.copy(cumsum)
cumsum_tmp[(i - 1):] = 1
fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
fwd_xp = np.linspace(0.0, 1.0, i)
fwd_fp = cumsum_tmp[:i]
forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
# backward interpolation
bwd_x = np.linspace(0.0, 1.0, i)
bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
bwd_fp = forward_interp
backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
cumsum_tmp[:i] = backward_interp
if 0 in cumsum_tmp:
raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error")
kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp)) # Kullback-Leibler-J
kl = np.concatenate((kl, [kl_tmp]))
th_layer_out = threshold[np.argmin(kl)]
threshold = float(th_layer_out)
if threshold < 1e-5:
threshold = 1e-5
return threshold
def query_quant_layers(network):
r"""
Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
layers, which not exist in practical execution, may appear.
Args:
network (Cell): input network
Examples:
>>> from mindspore.compression.quant import QuantizationAwareTraining
>>> from mindspore.compression.quant.quant_utils import query_quant_layers
>>> class LeNet5(nn.Cell):
... def __init__(self, num_class=10, channel=1):
... super(LeNet5, self).__init__()
... self.type = "fusion"
... self.num_class = num_class
...
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
... # change `nn.Dense` to `nn.DenseBnAct`
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
... self.fc3 = nn.DenseBnAct(84, self.num_class)
...
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
... self.flatten = nn.Flatten()
...
... def construct(self, x):
... x = self.conv1(x)
... x = self.max_pool2d(x)
... x = self.conv2(x)
... x = self.max_pool2d(x)
... x = self.flatten(x)
... x = self.fc1(x)
... x = self.fc2(x)
... x = self.fc3(x)
... return x
...
>>> net = LeNet5()
>>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
>>> net_qat = quantizer.quantize(net)
>>> query_quant_layers(net_qat)
conv1.conv.fake_quant_weight INT8
conv1.activation.fake_quant_act INT8
conv2.conv.fake_quant_weight INT8
conv2.activation.fake_quant_act INT8
fc1.dense.fake_quant_weight INT8
fc1.activation.fake_quant_act INT8
fc2.dense.fake_quant_weight INT8
fc2.activation.fake_quant_act INT8
fc3.dense.fake_quant_weight INT8
fc3.activation.fake_quant_act INT8
"""
network = Validator.check_isinstance("network", network, nn.Cell)
tplt = "{0:60}\t{1:10}"
for cell_and_name in network.cells_and_names():
cell_name = cell_and_name[0]
cell = cell_and_name[1]
if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
logger.info(tplt.format(cell_name, cell.quant_dtype))
def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
r"""
Load fp32 model parameters into quantization model.
Args:
quant_model(Cell): Quantization model.
params_dict(dict): Parameter dict that stores fp32 parameters.
quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
network. Default: None.
Raises:
TypeError: If `quant_new_params` is not None and is not list.
ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
nor in `quant_new_params`.
Examples:
>>> import mindspore as ms
>>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
>>> class LeNet5(nn.Cell):
... def __init__(self, num_class=10, channel=1):
... super(LeNet5, self).__init__()
... self.type = "fusion"
... self.num_class = num_class
...
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
... # change `nn.Dense` to `nn.DenseBnAct`
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
... self.fc3 = nn.DenseBnAct(84, self.num_class)
...
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
... self.flatten = nn.Flatten()
...
... def construct(self, x):
... x = self.conv1(x)
... x = self.max_pool2d(x)
... x = self.conv2(x)
... x = self.max_pool2d(x)
... x = self.flatten(x)
... x = self.fc1(x)
... x = self.fc2(x)
... x = self.fc3(x)
... return x
...
>>> net = LeNet5()
>>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt"
>>> param_dict = ms.load_checkpoint(ckpt_file_name)
>>> load_nonquant_param_into_quant_net(net, param_dict)
"""
if quant_new_params is not None and not isinstance(quant_new_params, list):
raise TypeError("quant_new_params must be list or None.")
iterable_dict = {
'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
}
for param in params_dict.items():
key_name = param[0].split(".")[-1]
if key_name not in iterable_dict:
iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value),
params_dict.items())))
for name, param in quant_model.parameters_and_names():
key_name = name.split(".")[-1]
if key_name not in iterable_dict.keys():
if key_name not in quant_new_params:
raise ValueError(f"Can't find match parameter in ckpt, param name = {name}")
continue
value_param = next(iterable_dict[key_name], None)
if value_param:
param.set_data(value_param[1].data)
logger.info(f'init model param {name} with checkpoint param {value_param[0]}')
# Perform KL_init when learned scale quantization is executed.
for cell_and_name in quant_model.cells_and_names():
cell = cell_and_name[1]
if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
subcell_weight_para = cell.weight.data.asnumpy()
if hasattr(cell, 'gamma'):
scale_factor = (cell.gamma.data.asnumpy() /
np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
if cell.fake_quant_weight.per_channel:
max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
for weight_para_each in subcell_weight_para]
min_init = [-x for x in max_init]
else:
max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
min_init = [-x for x in max_init]
cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
min_init=min_init, max_init=max_init)

View File

@ -1,68 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Note:
Base Class of Quantizer. This is interface that is subject to change or deletion.
"""
from __future__ import absolute_import
from abc import ABC, abstractmethod
from enum import Enum
from mindspore._checkparam import Validator
__all__ = ["OptimizeOption"]
class OptimizeOption(Enum):
r"""
An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
"""
# using quantization aware training
QAT = "QAT"
# using the learned scale quantization
LEARNED_SCALE = "LEARNED_SCALE"
def __str__(self):
return str(self.value)
class Quantizer(ABC):
"""
Base class of Quantizer. You can implement different kind of quantizer to get different quantization result.
Notes:
This class is an abstract class.
Args:
optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default:
OptimizeOption.QAT.
"""
def __init__(self,
optimize_option=OptimizeOption.QAT):
if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple):
optimize_option = [optimize_option]
for option in optimize_option:
option = Validator.check_isinstance("optimize_option", option, OptimizeOption)
self.optimize_option = optimize_option
@abstractmethod
def quantize(self, network):
"""
Quant API to convert input network to a quantization aware training network
Args:
network (Cell): network to be quantized.
"""

View File

@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network.
from __future__ import absolute_import from __future__ import absolute_import
from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \ from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \
image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
from mindspore.nn.layer.activation import * from mindspore.nn.layer.activation import *
from mindspore.nn.layer.normalization import * from mindspore.nn.layer.normalization import *
from mindspore.nn.layer.container import * from mindspore.nn.layer.container import *
@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import *
from mindspore.nn.layer.embedding import * from mindspore.nn.layer.embedding import *
from mindspore.nn.layer.pooling import * from mindspore.nn.layer.pooling import *
from mindspore.nn.layer.image import * from mindspore.nn.layer.image import *
from mindspore.nn.layer.quant import *
from mindspore.nn.layer.math import * from mindspore.nn.layer.math import *
from mindspore.nn.layer.combined import * from mindspore.nn.layer.combined import *
from mindspore.nn.layer.timedistributed import * from mindspore.nn.layer.timedistributed import *
@ -53,7 +52,6 @@ __all__.extend(basic.__all__)
__all__.extend(embedding.__all__) __all__.extend(embedding.__all__)
__all__.extend(pooling.__all__) __all__.extend(pooling.__all__)
__all__.extend(image.__all__) __all__.extend(image.__all__)
__all__.extend(quant.__all__)
__all__.extend(math.__all__) __all__.extend(math.__all__)
__all__.extend(combined.__all__) __all__.extend(combined.__all__)
__all__.extend(timedistributed.__all__) __all__.extend(timedistributed.__all__)

File diff suppressed because it is too large Load Diff

View File

@ -25,7 +25,6 @@ import stat
import threading import threading
from threading import Thread, Lock from threading import Thread, Lock
from collections import defaultdict, OrderedDict from collections import defaultdict, OrderedDict
from functools import wraps
from io import BytesIO from io import BytesIO
import math import math
@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor from mindspore.common.tensor import Tensor
from mindspore.common._utils import is_shape_unknown from mindspore.common._utils import is_shape_unknown
from mindspore.communication.management import get_rank, get_group_size from mindspore.communication.management import get_rank, get_group_size
from mindspore.compression.export import quant_export
from mindspore.experimental import MapParameter from mindspore.experimental import MapParameter
from mindspore.parallel._cell_wrapper import get_allgather_cell from mindspore.parallel._cell_wrapper import get_allgather_cell
from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
kwargs (dict): Configuration options dictionary. kwargs (dict): Configuration options dictionary.
- quant_mode (str): If the network is a quantization aware training network, the quant_mode should
be set to "QUANT", else the quant_mode should be set to "NONQUANT".
- mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network.
Default: 127.5.
- std_dev (float): The variance of input data after preprocessing,
used for quantizing the first layer of the network. Default: 127.5.
- enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32. - enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32.
- enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set. - enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set.
@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
inputs = tuple(inputs_col) inputs = tuple(inputs_col)
file_name = os.path.realpath(file_name) file_name = os.path.realpath(file_name)
net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
if 'enc_key' in kwargs.keys(): if 'enc_key' in kwargs.keys():
kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs) kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs)
_export(net, file_name, file_format, *inputs, **kwargs) _export(net, file_name, file_format, *inputs, **kwargs)
@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset):
model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False
def quant_mode_manage(func):
"""Inherit the quant_mode in old version."""
@wraps(func)
def wrapper(network, *inputs, file_format, **kwargs):
if 'quant_mode' not in kwargs:
return network
quant_mode = kwargs.get('quant_mode')
if not isinstance(quant_mode, str):
raise TypeError("For 'export', the type of 'quant_mode' should be string, "
"but got {}.".format(type(quant_mode)))
if quant_mode in ('AUTO', 'MANUAL'):
kwargs['quant_mode'] = 'QUANT'
return func(network, *inputs, file_format=file_format, **kwargs)
return wrapper
@quant_mode_manage
def _quant_export(network, *inputs, file_format, **kwargs):
"""Exports MindSpore quantization predict model to deploy with AIR and MINDIR."""
supported_device = ["Ascend", "GPU"]
supported_formats = ['AIR', 'MINDIR']
quant_mode_formats = ['QUANT', 'NONQUANT']
quant_mode = kwargs['quant_mode']
if quant_mode not in quant_mode_formats:
raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, "
f"but got {quant_mode}.")
if quant_mode == 'NONQUANT':
return network
quant_net = copy.deepcopy(network)
quant_net._create_time = int(time.time() * 1e9)
mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean')
std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev')
mean = Validator.check_value_type("mean", mean, (int, float))
std_dev = Validator.check_value_type("std_dev", std_dev, (int, float))
if context.get_context('device_target') not in supported_device:
raise KeyError(f"For 'export', quant export only support {supported_device} device target now, "
f"but got {context.get_context('device_target')}")
if file_format not in supported_formats:
raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, "
f"but got {file_format}.")
quant_net.set_train(False)
if file_format == "MINDIR":
exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True)
else:
exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs)
deploy_net = exporter.run()
return deploy_net
def parse_print(print_file_name): def parse_print(print_file_name):
""" """
Parse data file generated by mindspore.ops.Print. Parse data file generated by mindspore.ops.Print.

View File

@ -1,31 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in test_lenet_quant.py
"""
from easydict import EasyDict as edict
quant_cfg = edict({
'num_classes': 10,
'lr': 0.01,
'momentum': 0.9,
'epoch_size': 10,
'batch_size': 64,
'buffer_size': 1000,
'image_height': 32,
'image_width': 32,
'keep_checkpoint_max': 10,
})

View File

@ -1,60 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Produce the dataset
"""
import mindspore.dataset as ds
import mindspore.dataset.vision as CV
import mindspore.dataset.transforms as C
from mindspore.dataset.vision import Inter
from mindspore.common import dtype as mstype
def create_dataset(data_path, batch_size=32, repeat_size=1,
num_parallel_workers=1):
"""
create dataset for train or test
"""
# define dataset
mnist_ds = ds.MnistDataset(data_path)
resize_height, resize_width = 32, 32
rescale = 1.0 / 255.0
shift = 0.0
rescale_nml = 1 / 0.3081
shift_nml = -1 * 0.1307 / 0.3081
# define map operations
resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode
rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
rescale_op = CV.Rescale(rescale, shift)
hwc2chw_op = CV.HWC2CHW()
type_cast_op = C.TypeCast(mstype.int32)
# apply map operations on images
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
# apply DatasetOps
buffer_size = 10000
mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
mnist_ds = mnist_ds.repeat(repeat_size)
return mnist_ds

View File

@ -1,58 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""LeNet."""
import mindspore.nn as nn
class LeNet5(nn.Cell):
"""
Lenet network
Args:
num_class (int): Num classes. Default: 10.
Returns:
Tensor, output tensor
Examples:
>>> LeNet(num_class=10)
"""
def __init__(self, num_class=10, channel=1):
super(LeNet5, self).__init__()
self.type = "fusion"
self.num_class = num_class
# change `nn.Conv2d` to `nn.Conv2dBnAct`
self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
# change `nn.Dense` to `nn.DenseBnAct`
self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
self.fc3 = nn.DenseBnAct(84, self.num_class)
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten()
def construct(self, x):
x = self.conv1(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.max_pool2d(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x

View File

@ -1,199 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
train and infer lenet quantization network
"""
import os
import pytest
from mindspore import context
from mindspore import Tensor
from mindspore.common import dtype as mstype
import mindspore.nn as nn
from mindspore.train.metrics import Accuracy
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
from mindspore import load_checkpoint, load_param_into_net, export
from mindspore.train import Model
from mindspore.compression.quant import QuantizationAwareTraining
from mindspore.compression.quant.quantizer import OptimizeOption
from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
from dataset import create_dataset
from config import quant_cfg
from lenet_fusion import LeNet5 as LeNet5Fusion
import numpy as np
data_path = "/home/workspace/mindspore_dataset/mnist"
lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt"
def train_lenet_quant(optim_option="QAT"):
cfg = quant_cfg
ckpt_path = lenet_ckpt_path
ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1)
step_size = ds_train.get_dataset_size()
# define fusion network
network = LeNet5Fusion(cfg.num_classes)
# load quantization aware network checkpoint
param_dict = load_checkpoint(ckpt_path)
load_nonquant_param_into_quant_net(network, param_dict)
# convert fusion network to quantization aware network
if optim_option == "LEARNED_SCALE":
quant_optim_otions = OptimizeOption.LEARNED_SCALE
quantizer = QuantizationAwareTraining(bn_fold=False,
per_channel=[True, False],
symmetric=[True, True],
narrow_range=[True, True],
freeze_bn=0,
quant_delay=0,
one_conv_fold=True,
optimize_option=quant_optim_otions)
else:
quantizer = QuantizationAwareTraining(quant_delay=900,
bn_fold=False,
per_channel=[True, False],
symmetric=[True, False])
network = quantizer.quantize(network)
# define network loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
# call back and monitor
config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size,
keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt)
# define model
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
print("============== Starting Training ==============")
model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()],
dataset_sink_mode=True)
print("============== End Training ==============")
def eval_quant(optim_option="QAT"):
cfg = quant_cfg
ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1)
ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt'
# define fusion network
network = LeNet5Fusion(cfg.num_classes)
# convert fusion network to quantization aware network
if optim_option == "LEARNED_SCALE":
quant_optim_otions = OptimizeOption.LEARNED_SCALE
quantizer = QuantizationAwareTraining(bn_fold=False,
per_channel=[True, False],
symmetric=[True, True],
narrow_range=[True, True],
freeze_bn=0,
quant_delay=0,
one_conv_fold=True,
optimize_option=quant_optim_otions)
else:
quantizer = QuantizationAwareTraining(quant_delay=0,
bn_fold=False,
freeze_bn=10000,
per_channel=[True, False],
symmetric=[True, False])
network = quantizer.quantize(network)
# define loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
# call back and monitor
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
# load quantization aware network checkpoint
param_dict = load_checkpoint(ckpt_path)
not_load_param = load_param_into_net(network, param_dict)
if not_load_param:
raise ValueError("Load param into net fail!")
print("============== Starting Testing ==============")
acc = model.eval(ds_eval, dataset_sink_mode=True)
print("============== {} ==============".format(acc))
assert acc['Accuracy'] > 0.98
def export_lenet(optim_option="QAT", file_format="MINDIR"):
cfg = quant_cfg
# define fusion network
network = LeNet5Fusion(cfg.num_classes)
# convert fusion network to quantization aware network
if optim_option == "LEARNED_SCALE":
quant_optim_otions = OptimizeOption.LEARNED_SCALE
quantizer = QuantizationAwareTraining(bn_fold=False,
per_channel=[True, False],
symmetric=[True, True],
narrow_range=[True, True],
freeze_bn=0,
quant_delay=0,
one_conv_fold=True,
optimize_option=quant_optim_otions)
else:
quantizer = QuantizationAwareTraining(quant_delay=0,
bn_fold=False,
freeze_bn=10000,
per_channel=[True, False],
symmetric=[True, False])
network = quantizer.quantize(network)
# export network
inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32)
export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO')
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_lenet_quant():
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
train_lenet_quant()
eval_quant()
export_lenet()
train_lenet_quant(optim_option="LEARNED_SCALE")
eval_quant(optim_option="LEARNED_SCALE")
export_lenet(optim_option="LEARNED_SCALE")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_lenet_quant_ascend():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
train_lenet_quant(optim_option="LEARNED_SCALE")
eval_quant(optim_option="LEARNED_SCALE")
export_lenet(optim_option="LEARNED_SCALE", file_format="AIR")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_lenet_quant_ascend_pynative():
"""
test_lenet_quant_ascend_pynative
Features: test_lenet_quant_ascend_pynative
Description: test_lenet_quant_ascend_pynative pynative mode
Expectation: None
"""
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
train_lenet_quant(optim_option="QAT")

View File

@ -1,67 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" create train dataset. """
from functools import partial
import mindspore.dataset as ds
import mindspore.common.dtype as mstype
import mindspore.dataset.vision as C
import mindspore.dataset.transforms as C2
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
"""
create a train dataset
Args:
dataset_path(string): the path of dataset.
config(EasyDict)the basic config for training
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.
Returns:
dataset
"""
load_func = partial(ds.Cifar10Dataset, dataset_path)
cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
resize_height = config.image_height
resize_width = config.image_width
rescale = 1.0 / 255.0
shift = 0.0
# define map operations
# interpolation default BILINEAR
resize_op = C.Resize((resize_height, resize_width))
rescale_op = C.Rescale(rescale, shift)
normalize_op = C.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
changeswap_op = C.HWC2CHW()
type_cast_op = C2.TypeCast(mstype.int32)
c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
# apply map operations on images
cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
# apply batch operations
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
# apply dataset repeat operation
cifar_ds = cifar_ds.repeat(repeat_num)
return cifar_ds

View File

@ -1,56 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import math
import numpy as np
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
"""
generate learning rate array
Args:
global_step(int): total steps of the training
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(int): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch
Returns:
np.array, learning rate array
"""
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
warmup_steps = steps_per_epoch * warmup_epochs
for i in range(total_steps):
if i < warmup_steps:
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
else:
lr = lr_end + \
(lr_max - lr_end) * \
(1. + math.cos(math.pi * (i - warmup_steps) /
(total_steps - warmup_steps))) / 2.
if lr < 0.0:
lr = 0.0
lr_each_step.append(lr)
current_step = global_step
lr_each_step = np.array(lr_each_step).astype(np.float32)
learning_rate = lr_each_step[current_step:]
return learning_rate

View File

@ -1,263 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MobileNetV2 Quant model define"""
import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore import Tensor
__all__ = ['mobilenetV2']
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class GlobalAvgPooling(nn.Cell):
"""
Global avg pooling definition.
Args:
Returns:
Tensor, output tensor.
Examples:
>>> GlobalAvgPooling()
"""
def __init__(self):
super(GlobalAvgPooling, self).__init__()
self.mean = P.ReduceMean(keep_dims=False)
def construct(self, x):
x = self.mean(x, (2, 3))
return x
class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
stride=stride,
pad_mode='pad',
padding=padding,
group=groups,
has_bn=True,
activation='relu')
def construct(self, x):
x = self.conv(x)
return x
class InvertedResidual(nn.Cell):
"""
Mobilenetv2 residual block definition.
Args:
inp (int): Input channel.
oup (int): Output channel.
stride (int): Stride size for the first convolutional layer. Default: 1.
expand_ratio (int): expand ration of input channel
Returns:
Tensor, output tensor.
Examples:
>>> ResidualBlock(3, 256, 1, 1)
"""
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim,
stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
pad_mode='pad', padding=0, group=1, has_bn=True)
])
self.conv = nn.SequentialCell(layers)
self.add = P.Add()
def construct(self, x):
out = self.conv(x)
if self.use_res_connect:
out = self.add(out, x)
return out
class mobilenetV2(nn.Cell):
"""
mobilenetV2 fusion architecture.
Args:
class_num (Cell): number of classes.
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
has_dropout (bool): Is dropout used. Default is false
inverted_residual_setting (list): Inverted residual settings. Default is None
round_nearest (list): Channel round to . Default is 8
Returns:
Tensor, output tensor.
Examples:
>>> mobilenetV2(num_classes=1000)
"""
def __init__(self, num_classes=1000, width_mult=1.,
has_dropout=False, inverted_residual_setting=None, round_nearest=8):
super(mobilenetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
# setting of inverted residual blocks
self.cfgs = inverted_residual_setting
if inverted_residual_setting is None:
self.cfgs = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# building first layer
input_channel = _make_divisible(
input_channel * width_mult, round_nearest)
self.out_channels = _make_divisible(
last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(
block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(
input_channel, self.out_channels, kernel_size=1))
# make it nn.CellList
self.features = nn.SequentialCell(features)
# mobilenet head
head = ([GlobalAvgPooling(),
nn.DenseBnAct(self.out_channels, num_classes,
has_bias=True, has_bn=False)
] if not has_dropout else
[GlobalAvgPooling(),
nn.Dropout(0.2),
nn.DenseBnAct(self.out_channels, num_classes,
has_bias=True, has_bn=False)
])
self.head = nn.SequentialCell(head)
# init weights
self.init_parameters_data()
self._initialize_weights()
def construct(self, x):
x = self.features(x)
x = self.head(x)
return x
def _initialize_weights(self):
"""
Initialize weights.
Args:
Returns:
None.
Examples:
>>> _initialize_weights()
"""
self.init_parameters_data()
for _, m in self.cells_and_names():
np.random.seed(1)
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
m.weight.data.shape).astype("float32"))
m.weight.set_data(w)
if m.bias is not None:
m.bias.set_data(
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
elif isinstance(m, nn.Conv2dBnAct):
n = m.conv.kernel_size[0] * \
m.conv.kernel_size[1] * m.conv.out_channels
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
m.conv.weight.data.shape).astype("float32"))
m.conv.weight.set_data(w)
if m.conv.bias is not None:
m.conv.bias.set_data(
Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
elif isinstance(m, nn.BatchNorm2d):
m.gamma.set_data(
Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
m.beta.set_data(
Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
elif isinstance(m, nn.Dense):
m.weight.set_data(Tensor(np.random.normal(
0, 0.01, m.weight.data.shape).astype("float32")))
if m.bias is not None:
m.bias.set_data(
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
elif isinstance(m, nn.DenseBnAct):
m.dense.weight.set_data(
Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
if m.dense.bias is not None:
m.dense.bias.set_data(
Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))

View File

@ -1,136 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Train Mobilenetv2_quant on Cifar10"""
import pytest
import numpy as np
from easydict import EasyDict as ed
from mindspore import context
from mindspore import Tensor
from mindspore import nn
from mindspore.train.model import Model
from mindspore.compression.quant import QuantizationAwareTraining
from mindspore.common import set_seed
from dataset import create_dataset
from lr_generator import get_lr
from utils import Monitor, CrossEntropyWithLabelSmooth
from mobilenetV2 import mobilenetV2
config_ascend_quant = ed({
"num_classes": 10,
"image_height": 224,
"image_width": 224,
"batch_size": 200,
"step_threshold": 10,
"data_load_mode": "mindata",
"epoch_size": 1,
"start_epoch": 200,
"warmup_epochs": 1,
"lr": 0.3,
"momentum": 0.9,
"weight_decay": 4e-5,
"label_smooth": 0.1,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 300,
"save_checkpoint_path": "./checkpoint",
})
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
def train():
"""train"""
config = config_ascend_quant
print("training configure: {}".format(config))
epoch_size = config.epoch_size
# define network
network = mobilenetV2(num_classes=config.num_classes)
# define loss
if config.label_smooth > 0:
loss = CrossEntropyWithLabelSmooth(
smooth_factor=config.label_smooth, num_classes=config.num_classes)
else:
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset
dataset = create_dataset(dataset_path=dataset_path,
config=config,
repeat_num=1,
batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
# convert fusion network to quantization aware network
quantizer = QuantizationAwareTraining(bn_fold=True,
per_channel=[True, False],
symmetric=[True, False])
network = quantizer.quantize(network)
# get learning rate
lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
lr_init=0,
lr_end=0,
lr_max=config.lr,
warmup_epochs=config.warmup_epochs,
total_epochs=epoch_size + config.start_epoch,
steps_per_epoch=step_size))
# define optimization
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
config.weight_decay)
# define model
model = Model(network, loss_fn=loss, optimizer=opt)
print("============== Starting Training ==============")
monitor = Monitor(lr_init=lr.asnumpy(),
step_threshold=config.step_threshold)
callback = [monitor]
model.train(epoch_size, dataset, callbacks=callback,
dataset_sink_mode=False)
print("============== End Training ==============")
export_time_used = 650
train_time = monitor.step_mseconds
print('train_time_used:{}'.format(train_time))
assert train_time < export_time_used
expect_avg_step_loss = 2.32
avg_step_loss = np.mean(np.array(monitor.losses))
print("average step loss:{}".format(avg_step_loss))
assert avg_step_loss < expect_avg_step_loss
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_single
def test_mobilenetv2_quant():
"""
test_mobilenetv2_quant
Features: test_mobilenetv2_quant
Description: test_mobilenetv2_quant graph mode
Expectation: None
"""
set_seed(1)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
train()
if __name__ == '__main__':
test_mobilenetv2_quant()

View File

@ -1,121 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Train Mobilenetv2_quant gpu on Cifar10"""
import pytest
import numpy as np
from easydict import EasyDict as ed
from mindspore import context
from mindspore import Tensor
from mindspore import nn
from mindspore.train.model import Model
from mindspore.compression.quant import QuantizationAwareTraining
from mindspore.common import set_seed
from dataset import create_dataset
from lr_generator import get_lr
from utils import Monitor, CrossEntropyWithLabelSmooth
from mobilenetV2 import mobilenetV2
config_ascend_quant = ed({
"num_classes": 10,
"image_height": 224,
"image_width": 224,
"batch_size": 300,
"step_threshold": 10,
"data_load_mode": "mindata",
"epoch_size": 1,
"start_epoch": 200,
"warmup_epochs": 1,
"lr": 0.05,
"momentum": 0.997,
"weight_decay": 4e-5,
"label_smooth": 0.1,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 300,
"save_checkpoint_path": "./checkpoint",
})
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
@pytest.mark.level2
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_single
def test_mobilenetv2_quant():
set_seed(1)
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
config = config_ascend_quant
print("training configure: {}".format(config))
epoch_size = config.epoch_size
# define network
network = mobilenetV2(num_classes=config.num_classes)
# define loss
if config.label_smooth > 0:
loss = CrossEntropyWithLabelSmooth(
smooth_factor=config.label_smooth, num_classes=config.num_classes)
else:
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset
dataset = create_dataset(dataset_path=dataset_path,
config=config,
repeat_num=1,
batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
# convert fusion network to quantization aware network
quantizer = QuantizationAwareTraining(bn_fold=True,
per_channel=[True, False],
symmetric=[False, False])
network = quantizer.quantize(network)
# get learning rate
lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
lr_init=0,
lr_end=0,
lr_max=config.lr,
warmup_epochs=config.warmup_epochs,
total_epochs=epoch_size + config.start_epoch,
steps_per_epoch=step_size))
# define optimization
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
config.weight_decay)
# define model
model = Model(network, loss_fn=loss, optimizer=opt)
print("============== Starting Training ==============")
monitor = Monitor(lr_init=lr.asnumpy(),
step_threshold=config.step_threshold)
callback = [monitor]
model.train(epoch_size, dataset, callbacks=callback,
dataset_sink_mode=False)
print("============== End Training ==============")
train_time = monitor.step_mseconds
print('train_time_used:{}'.format(train_time))
avg_step_loss = np.mean(np.array(monitor.losses))
print("average step loss:{}".format(avg_step_loss))
expect_avg_step_loss = 2.32
assert avg_step_loss < expect_avg_step_loss
export_time_used = 960
assert train_time < export_time_used
if __name__ == '__main__':
test_mobilenetv2_quant()

View File

@ -1,120 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MobileNetV2 utils"""
import time
import numpy as np
from mindspore.train.callback import Callback
from mindspore import Tensor
from mindspore import nn
from mindspore.nn.loss.loss import LossBase
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common import dtype as mstype
class Monitor(Callback):
"""
Monitor loss and time.
Args:
lr_init (numpy array): train lr
Returns:
None
Examples:
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
"""
def __init__(self, lr_init=None, step_threshold=10):
super(Monitor, self).__init__()
self.lr_init = lr_init
self.lr_init_len = len(lr_init)
self.step_threshold = step_threshold
self.step_mseconds = 50000
def epoch_begin(self, run_context):
self.losses = []
self.epoch_time = time.time()
def epoch_end(self, run_context):
cb_params = run_context.original_args()
epoch_mseconds = (time.time() - self.epoch_time) * 1000
per_step_mseconds = epoch_mseconds / cb_params.batch_num
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
per_step_mseconds,
np.mean(self.losses)))
self.epoch_mseconds = epoch_mseconds
def step_begin(self, run_context):
self.step_time = time.time()
def step_end(self, run_context):
cb_params = run_context.original_args()
step_mseconds = (time.time() - self.step_time) * 1000
self.step_mseconds = min(self.step_mseconds, step_mseconds)
step_loss = cb_params.net_outputs
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
step_loss = step_loss[0]
if isinstance(step_loss, Tensor):
step_loss = np.mean(step_loss.asnumpy())
self.losses.append(step_loss)
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
1, cb_params.batch_num, step_loss,
np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
if cb_params.cur_step_num == self.step_threshold:
run_context.request_stop()
class CrossEntropyWithLabelSmooth(LossBase):
"""
CrossEntropyWith LabelSmooth.
Args:
smooth_factor (float): smooth factor, default=0.
num_classes (int): num classes
Returns:
None.
Examples:
>>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
"""
def __init__(self, smooth_factor=0., num_classes=1000):
super(CrossEntropyWithLabelSmooth, self).__init__()
self.onehot = P.OneHot()
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor /
(num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean(False)
self.cast = P.Cast()
def construct(self, logit, label):
one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
self.on_value, self.off_value)
out_loss = self.ce(logit, one_hot_label)
out_loss = self.mean(out_loss, 0)
return out_loss

View File

@ -22,20 +22,18 @@ from mindspore import nn
from mindspore import context from mindspore import context
from mindspore import Tensor from mindspore import Tensor
from mindspore.common import set_seed from mindspore.common import set_seed
from mindspore.compression.quant import create_quant_config
class Net(nn.Cell): class Net(nn.Cell):
def __init__(self, qconfig): def __init__(self):
super(Net, self).__init__() super(Net, self).__init__()
self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid')
pad_mode='valid', quant_config=qconfig)
def construct(self, x): def construct(self, x):
return self.conv(x) return self.conv(x)
def test_conv2d_bn_fold_quant(): def test_conv2d_bn_fold_quant():
set_seed(1) set_seed(1)
quant_config = create_quant_config() network = Net()
network = Net(quant_config)
inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32)) inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32))
label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32)) label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32))
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9) opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9)
@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant():
train_network = nn.TrainOneStepCell(net_with_loss, opt) train_network = nn.TrainOneStepCell(net_with_loss, opt)
train_network.set_train() train_network.set_train()
out_loss = train_network(inputs, label) out_loss = train_network(inputs, label)
print("------------------", out_loss.asnumpy())
expect_loss = np.array([0.940427]) expect_loss = np.array([0.940427])
error = np.array([0.1]) error = np.array([0.1])
diff = out_loss.asnumpy() - expect_loss diff = out_loss.asnumpy() - expect_loss
assert np.all(abs(diff) < error) assert np.all(abs(diff) < error)
@pytest.mark.level1 @pytest.mark.level1
@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training

View File

@ -1,67 +0,0 @@
# Copyright 2020-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" create train dataset. """
from functools import partial
import mindspore.common.dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.transforms as C2
import mindspore.dataset.vision as C
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
"""
create a train dataset
Args:
dataset_path(string): the path of dataset.
config(EasyDict)the basic config for training
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.
Returns:
dataset
"""
load_func = partial(ds.Cifar10Dataset, dataset_path)
data_set = load_func(num_parallel_workers=8, shuffle=False)
resize_height = config.image_height
resize_width = config.image_width
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
# define map operations
resize_op = C.Resize((resize_height, resize_width))
normalize_op = C.Normalize(mean=mean, std=std)
changeswap_op = C.HWC2CHW()
c_trans = [resize_op, normalize_op, changeswap_op]
type_cast_op = C2.TypeCast(mstype.int32)
data_set = data_set.map(operations=c_trans, input_columns="image",
num_parallel_workers=8)
data_set = data_set.map(operations=type_cast_op,
input_columns="label", num_parallel_workers=8)
# apply batch operations
data_set = data_set.batch(batch_size, drop_remainder=True)
# apply dataset repeat operation
data_set = data_set.repeat(repeat_num)
return data_set

View File

@ -1,93 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import math
import numpy as np
def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
"""
generate learning rate array
Args:
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(int): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch
lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
Returns:
np.array, learning rate array
"""
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
warmup_steps = steps_per_epoch * warmup_epochs
if lr_decay_mode == 'steps':
decay_epoch_index = [0.3 * total_steps,
0.6 * total_steps, 0.8 * total_steps]
for i in range(total_steps):
if i < decay_epoch_index[0]:
lr = lr_max
elif i < decay_epoch_index[1]:
lr = lr_max * 0.1
elif i < decay_epoch_index[2]:
lr = lr_max * 0.01
else:
lr = lr_max * 0.001
lr_each_step.append(lr)
elif lr_decay_mode == 'poly':
if warmup_steps != 0:
inc_each_step = (float(lr_max) - float(lr_init)) / \
float(warmup_steps)
else:
inc_each_step = 0
for i in range(total_steps):
if i < warmup_steps:
lr = float(lr_init) + inc_each_step * float(i)
else:
base = (1.0 - (float(i) - float(warmup_steps)) /
(float(total_steps) - float(warmup_steps)))
lr = float(lr_max) * base * base
if lr < 0.0:
lr = 0.0
lr_each_step.append(lr)
elif lr_decay_mode == 'cosine':
decay_steps = total_steps - warmup_steps
for i in range(total_steps):
if i < warmup_steps:
lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
lr = float(lr_init) + lr_inc * (i + 1)
else:
linear_decay = (total_steps - i) / decay_steps
cosine_decay = 0.5 * \
(1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
decayed = linear_decay * cosine_decay + 0.00001
lr = lr_max * decayed
lr_each_step.append(lr)
else:
for i in range(total_steps):
if i < warmup_steps:
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
else:
lr = lr_max - (lr_max - lr_end) * \
(i - warmup_steps) / (total_steps - warmup_steps)
lr_each_step.append(lr)
learning_rate = np.array(lr_each_step).astype(np.float32)
return learning_rate

View File

@ -1,346 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ResNet."""
import numpy as np
import mindspore.nn as nn
import mindspore.common.initializer as weight_init
from mindspore.ops import operations as P
from mindspore import Tensor
from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant
from mindspore.compression.quant import create_quant_config
_ema_decay = 0.999
_symmetric = True
_fake = True
_per_channel = True
_quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))
def _weight_variable(shape, factor=0.01):
init_value = np.random.randn(*shape).astype(np.float32) * factor
return Tensor(init_value)
def _conv3x3(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 3, 3)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
def _conv1x1(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 1, 1)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
def _conv7x7(in_channel, out_channel, stride=1):
weight_shape = (out_channel, in_channel, 7, 7)
weight = _weight_variable(weight_shape)
return nn.Conv2d(in_channel, out_channel,
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
def _bn(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
def _bn_last(channel):
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
def _fc(in_channel, out_channel):
weight_shape = (out_channel, in_channel)
weight = _weight_variable(weight_shape)
return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
group=groups, fake=_fake, quant_config=_quant_config)
layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
self.features = nn.SequentialCell(layers)
def construct(self, x):
output = self.features(x)
return output
class ResidualBlock(nn.Cell):
"""
ResNet V1 residual block definition.
Args:
in_channel (int): Input channel.
out_channel (int): Output channel.
stride (int): Stride size for the first convolutional layer. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ResidualBlock(3, 256, stride=2)
"""
expansion = 4
def __init__(self,
in_channel,
out_channel,
stride=1):
super(ResidualBlock, self).__init__()
channel = out_channel // self.expansion
self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
quant_config=_quant_config,
kernel_size=1, stride=1, pad_mode='same', padding=0),
FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
quant_config=_quant_config,
kernel_size=1, stride=1,
pad_mode='same', padding=0)
self.down_sample = False
if stride != 1 or in_channel != out_channel:
self.down_sample = True
self.down_sample_layer = None
if self.down_sample:
self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel,
quant_config=_quant_config,
kernel_size=1, stride=stride,
pad_mode='same', padding=0),
FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
symmetric=False)
]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel,
fake=_fake,
quant_config=_quant_config,
kernel_size=1,
stride=stride,
pad_mode='same',
padding=0)
self.add = nn.TensorAddQuant()
self.relu = P.ReLU()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
if self.down_sample:
identity = self.down_sample_layer(identity)
out = self.add(out, identity)
out = self.relu(out)
return out
class ResNet(nn.Cell):
"""
ResNet architecture.
Args:
block (Cell): Block for network.
layer_nums (list): Numbers of block in different layers.
in_channels (list): Input channel in each layer.
out_channels (list): Output channel in each layer.
strides (list): Stride size in each layer.
num_classes (int): The number of classes that the training images are belonging to.
Returns:
Tensor, output tensor.
Examples:
>>> ResNet(ResidualBlock,
>>> [3, 4, 6, 3],
>>> [64, 256, 512, 1024],
>>> [256, 512, 1024, 2048],
>>> [1, 2, 2, 2],
>>> 10)
"""
def __init__(self,
block,
layer_nums,
in_channels,
out_channels,
strides,
num_classes):
super(ResNet, self).__init__()
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
self.layer1 = self._make_layer(block,
layer_nums[0],
in_channel=in_channels[0],
out_channel=out_channels[0],
stride=strides[0])
self.layer2 = self._make_layer(block,
layer_nums[1],
in_channel=in_channels[1],
out_channel=out_channels[1],
stride=strides[1])
self.layer3 = self._make_layer(block,
layer_nums[2],
in_channel=in_channels[2],
out_channel=out_channels[2],
stride=strides[2])
self.layer4 = self._make_layer(block,
layer_nums[3],
in_channel=in_channels[3],
out_channel=out_channels[3],
stride=strides[3])
self.mean = P.ReduceMean(keep_dims=True)
self.flatten = nn.Flatten()
self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)
# init weights
self._initialize_weights()
def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
"""
Make stage network of ResNet.
Args:
block (Cell): Resnet block.
layer_num (int): Layer number.
in_channel (int): Input channel.
out_channel (int): Output channel.
stride (int): Stride size for the first convolutional layer.
Returns:
SequentialCell, the output layer.
Examples:
>>> _make_layer(ResidualBlock, 3, 128, 256, 2)
"""
layers = []
resnet_block = block(in_channel, out_channel, stride=stride)
layers.append(resnet_block)
for _ in range(1, layer_num):
resnet_block = block(out_channel, out_channel, stride=1)
layers.append(resnet_block)
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
c1 = self.maxpool(x)
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
out = self.mean(c5, (2, 3))
out = self.flatten(out)
out = self.end_point(out)
out = self.output_fake(out)
return out
def _initialize_weights(self):
self.init_parameters_data()
for _, m in self.cells_and_names():
np.random.seed(1)
if isinstance(m, nn.Conv2dBnFoldQuant):
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
m.weight.shape,
m.weight.dtype))
elif isinstance(m, nn.DenseQuant):
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
m.weight.shape,
m.weight.dtype))
elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
m.weight.shape,
m.weight.dtype))
def resnet50_quant(class_num=10):
"""
Get ResNet50 neural network.
Args:
class_num (int): Class number.
Returns:
Cell, cell instance of ResNet50 neural network.
Examples:
>>> net = resnet50_quant(10)
"""
return ResNet(ResidualBlock,
[3, 4, 6, 3],
[64, 256, 512, 1024],
[256, 512, 1024, 2048],
[1, 2, 2, 2],
class_num)
def resnet101_quant(class_num=1001):
"""
Get ResNet101 neural network.
Args:
class_num (int): Class number.
Returns:
Cell, cell instance of ResNet101 neural network.
Examples:
>>> net = resnet101(1001)
"""
return ResNet(ResidualBlock,
[3, 4, 23, 3],
[64, 256, 512, 1024],
[256, 512, 1024, 2048],
[1, 2, 2, 2],
class_num)

View File

@ -1,131 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Train Resnet50_quant on Cifar10"""
import pytest
import numpy as np
from easydict import EasyDict as ed
from mindspore import context
from mindspore import Tensor
from mindspore.nn.optim.momentum import Momentum
from mindspore.train.model import Model
from mindspore.compression.quant import QuantizationAwareTraining
from mindspore import set_seed
from resnet_quant_manual import resnet50_quant
from dataset import create_dataset
from lr_generator import get_lr
from utils import Monitor, CrossEntropy
config_quant = ed({
"class_num": 10,
"batch_size": 128,
"step_threshold": 20,
"loss_scale": 1024,
"momentum": 0.9,
"weight_decay": 1e-4,
"epoch_size": 1,
"pretrained_epoch_size": 90,
"buffer_size": 1000,
"image_height": 224,
"image_width": 224,
"data_load_mode": "original",
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 50,
"save_checkpoint_path": "./",
"warmup_epochs": 0,
"lr_decay_mode": "cosine",
"use_label_smooth": True,
"label_smooth_factor": 0.1,
"lr_init": 0,
"lr_max": 0.005,
})
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_resnet50_quant():
set_seed(1)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
config = config_quant
print("training configure: {}".format(config))
epoch_size = config.epoch_size
# define network
net = resnet50_quant(class_num=config.class_num)
net.set_train(True)
# define loss
if not config.use_label_smooth:
config.label_smooth_factor = 0.0
loss = CrossEntropy(
smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
#loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
# define dataset
dataset = create_dataset(dataset_path=dataset_path,
config=config,
repeat_num=1,
batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
# convert fusion network to quantization aware network
quantizer = QuantizationAwareTraining(bn_fold=True,
per_channel=[True, False],
symmetric=[True, False])
net = quantizer.quantize(net)
# get learning rate
lr = Tensor(get_lr(lr_init=config.lr_init,
lr_end=0.0,
lr_max=config.lr_max,
warmup_epochs=config.warmup_epochs,
total_epochs=config.epoch_size,
steps_per_epoch=step_size,
lr_decay_mode='cosine'))
# define optimization
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
config.weight_decay, config.loss_scale)
# define model
#model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
model = Model(net, loss_fn=loss, optimizer=opt)
print("============== Starting Training ==============")
monitor = Monitor(lr_init=lr.asnumpy(),
step_threshold=config.step_threshold)
callbacks = [monitor]
model.train(epoch_size, dataset, callbacks=callbacks,
dataset_sink_mode=False)
print("============== End Training ==============")
expect_avg_step_loss = 2.60
avg_step_loss = np.mean(np.array(monitor.losses))
print("average step loss:{}".format(avg_step_loss))
assert avg_step_loss < expect_avg_step_loss
if __name__ == '__main__':
test_resnet50_quant()

View File

@ -1,105 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Resnet50 utils"""
import time
import numpy as np
from mindspore.train.callback import Callback
from mindspore import Tensor
from mindspore import nn
from mindspore.nn.loss.loss import LossBase
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common import dtype as mstype
class Monitor(Callback):
"""
Monitor loss and time.
Args:
lr_init (numpy array): train lr
Returns:
None
Examples:
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
"""
def __init__(self, lr_init=None, step_threshold=10):
super(Monitor, self).__init__()
self.lr_init = lr_init
self.lr_init_len = len(lr_init)
self.step_threshold = step_threshold
def epoch_begin(self, run_context):
self.losses = []
self.epoch_time = time.time()
def epoch_end(self, run_context):
cb_params = run_context.original_args()
epoch_mseconds = (time.time() - self.epoch_time) * 1000
per_step_mseconds = epoch_mseconds / cb_params.batch_num
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
per_step_mseconds,
np.mean(self.losses)))
self.epoch_mseconds = epoch_mseconds
def step_begin(self, run_context):
self.step_time = time.time()
def step_end(self, run_context):
cb_params = run_context.original_args()
step_mseconds = (time.time() - self.step_time) * 1000
step_loss = cb_params.net_outputs
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
step_loss = step_loss[0]
if isinstance(step_loss, Tensor):
step_loss = np.mean(step_loss.asnumpy())
self.losses.append(step_loss)
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
1, cb_params.batch_num, step_loss,
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
if cb_params.cur_step_num == self.step_threshold:
run_context.request_stop()
class CrossEntropy(LossBase):
"""the redefined loss function with SoftmaxCrossEntropyWithLogits"""
def __init__(self, smooth_factor=0, num_classes=1001):
super(CrossEntropy, self).__init__()
self.onehot = P.OneHot()
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor /
(num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean(False)
def construct(self, logit, label):
one_hot_label = self.onehot(label, F.shape(
logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, one_hot_label)
loss = self.mean(loss, 0)
return loss