forked from mindspore-Ecosystem/mindspore
!47033 remove compression and testcases in mindspore
Merge pull request !47033 from hangq/wood
This commit is contained in:
commit
b6653ab2d6
|
@ -289,7 +289,6 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
|
||||
|
|
|
@ -164,7 +164,6 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
|
||||
|
|
|
@ -250,7 +250,6 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
|
||||
${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
|
||||
|
|
|
@ -23,9 +23,6 @@ mindspore.export
|
|||
|
||||
- **kwargs** (dict) - 配置选项字典。
|
||||
|
||||
- **quant_mode** (str) - 如果网络是量化感知训练网络,那么 `quant_mode` 需要设置为"QUANT",否则 `quant_mode` 需要设置为"NONQUANT"。
|
||||
- **mean** (float) - 预处理后输入数据的平均值,用于量化网络的第一层。默认值:127.5。
|
||||
- **std_dev** (float) - 预处理后输入数据的方差,用于量化网络的第一层。默认值:127.5。
|
||||
- **enc_key** (str) - 用于加密的字节类型密钥,有效长度为16、24或者32。
|
||||
- **enc_mode** (Union[str, function]) - 指定加密模式,当设置 `enc_key` 时启用。
|
||||
|
||||
|
|
|
@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
"Get the number of parallel operators.")
|
||||
.def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
|
||||
"Get Allreduce Fusion Dictionary.")
|
||||
.def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"),
|
||||
"Fetch the inputs of Conv or Matmul for quant export.")
|
||||
.def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
|
||||
"Build data graph.")
|
||||
.def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"),
|
||||
|
|
|
@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() {
|
|||
ConfigManager::GetInstance().ResetConfig();
|
||||
}
|
||||
|
||||
void GraphExecutorPy::GetWeightInfo(
|
||||
const CNodePtr &root_node, const AnfNodePtr &weight_node,
|
||||
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const {
|
||||
MS_EXCEPTION_IF_NULL(root_node);
|
||||
MS_EXCEPTION_IF_NULL(fake_quant_table);
|
||||
std::string weight_name;
|
||||
auto x = root_node->input(1);
|
||||
MS_EXCEPTION_IF_NULL(x);
|
||||
if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
|
||||
weight_name = weight_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
|
||||
} else {
|
||||
auto para = weight_node->cast_ptr<Parameter>();
|
||||
MS_EXCEPTION_IF_NULL(para);
|
||||
weight_name = para->name();
|
||||
}
|
||||
// find the fakequant from input
|
||||
int64_t count = 0;
|
||||
const int64_t max_depth = 5;
|
||||
auto is_quant_cnode = [](const AnfNodePtr &node) {
|
||||
return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
|
||||
};
|
||||
while (!is_quant_cnode(x)) {
|
||||
if (count >= max_depth) {
|
||||
break;
|
||||
}
|
||||
auto cnode = x->cast_ptr<CNode>();
|
||||
if (cnode == nullptr || cnode->size() <= 1) {
|
||||
break;
|
||||
}
|
||||
x = cnode->input(1);
|
||||
count += 1;
|
||||
}
|
||||
if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
|
||||
(*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
|
||||
}
|
||||
// get the fakequant parameter minq's name
|
||||
if (!is_quant_cnode(x)) {
|
||||
return;
|
||||
}
|
||||
auto cnode = x->cast_ptr<CNode>();
|
||||
constexpr size_t expect_input_size = 4;
|
||||
if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) {
|
||||
return;
|
||||
}
|
||||
const size_t fakequant_index = 2;
|
||||
auto fakequant_min_node = cnode->input(fakequant_index);
|
||||
if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
|
||||
return;
|
||||
}
|
||||
std::string fakequant_min_node_name;
|
||||
if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
|
||||
fakequant_min_node_name = fakequant_min_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
|
||||
} else {
|
||||
auto param = fakequant_min_node->cast_ptr<Parameter>();
|
||||
MS_EXCEPTION_IF_NULL(param);
|
||||
fakequant_min_node_name = param->name();
|
||||
}
|
||||
auto quant_op = GetValuePtr<PrimitivePy>(cnode->input(0));
|
||||
if (quant_op == nullptr) {
|
||||
return;
|
||||
}
|
||||
(*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name);
|
||||
}
|
||||
|
||||
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> GraphExecutorPy::FetchInfoForQuantExport(
|
||||
const std::string &phase) {
|
||||
FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!";
|
||||
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> fake_quant_table;
|
||||
auto filter = [](const AnfNodePtr &node) {
|
||||
return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
|
||||
};
|
||||
std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
|
||||
auto is_quant_cnode = [](const AnfNodePtr &node) {
|
||||
return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
|
||||
IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
|
||||
};
|
||||
const size_t root_node_size = 3;
|
||||
const size_t weight_index = 2;
|
||||
for (const auto &node : nodes) {
|
||||
auto root_node = node->cast<CNodePtr>();
|
||||
if (root_node == nullptr || root_node->size() != root_node_size) {
|
||||
continue;
|
||||
}
|
||||
auto weight = root_node->input(weight_index);
|
||||
if (!is_quant_cnode(weight)) {
|
||||
auto tuple_node = weight->cast_ptr<CNode>();
|
||||
if (tuple_node != nullptr) {
|
||||
auto fake_node = tuple_node->input(1);
|
||||
if (!is_quant_cnode(fake_node)) {
|
||||
continue;
|
||||
} else {
|
||||
weight = fake_node;
|
||||
}
|
||||
}
|
||||
}
|
||||
// get parameter weight's name
|
||||
auto cnode = weight->cast_ptr<CNode>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto weight_node = cnode->input(weight_index);
|
||||
MS_EXCEPTION_IF_NULL(weight_node);
|
||||
if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
|
||||
continue;
|
||||
}
|
||||
GetWeightInfo(root_node, weight_node, &fake_quant_table);
|
||||
}
|
||||
return fake_quant_table;
|
||||
}
|
||||
|
||||
void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) {
|
||||
// save the graph to GraphExecutorPy
|
||||
FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
|
||||
|
|
|
@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
|
|||
void TerminateDebugger();
|
||||
#endif
|
||||
|
||||
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
|
||||
const std::string &phase);
|
||||
|
||||
// Generate a key for mapping function graph
|
||||
py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false);
|
||||
|
||||
|
@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
|
|||
|
||||
private:
|
||||
GraphExecutorPy() = default;
|
||||
void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
|
||||
std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const;
|
||||
void ParallelPostProcess(const string &phase);
|
||||
void GetGeBackendPolicy() const;
|
||||
// filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
|
||||
|
|
|
@ -161,7 +161,6 @@
|
|||
#include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h"
|
||||
#include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h"
|
||||
|
@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr<session::KernelGraph> &kernel_graph
|
|||
unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>());
|
||||
unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>());
|
||||
unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>());
|
||||
unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerLayerGradUnifyMindIR>());
|
||||
unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerChannelGradUnifyMindIR>());
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
|
||||
|
|
|
@ -1,233 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
#include "runtime/device/kernel_info.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/trace_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD(
|
||||
const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
|
||||
std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
|
||||
const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
|
||||
if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
|
||||
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
|
||||
}
|
||||
std::vector<AnfNodePtr> lsq_perlayer_grad_d_inputs = {
|
||||
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDOpName)),
|
||||
lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3],
|
||||
lsq_perlayer_grad_inputs[kIndex4]};
|
||||
auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d);
|
||||
lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope());
|
||||
|
||||
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL),
|
||||
common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)};
|
||||
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL),
|
||||
common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)};
|
||||
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get());
|
||||
|
||||
common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d);
|
||||
CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
|
||||
lsq_perlayer_grad_d_outputs);
|
||||
}
|
||||
|
||||
void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad(
|
||||
const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
|
||||
const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
|
||||
std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
|
||||
const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
|
||||
if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
|
||||
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
|
||||
}
|
||||
if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be "
|
||||
<< kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perlayer_grad_node);
|
||||
}
|
||||
std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_inputs = {
|
||||
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)),
|
||||
lsq_perlayer_grad_d_outputs[kIndex1]};
|
||||
auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad);
|
||||
lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope());
|
||||
|
||||
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)};
|
||||
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)};
|
||||
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get());
|
||||
|
||||
(*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad);
|
||||
}
|
||||
|
||||
void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD(
|
||||
const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
|
||||
std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
|
||||
const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
|
||||
if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
|
||||
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
|
||||
}
|
||||
std::vector<AnfNodePtr> lsq_perchannel_grad_d_inputs = {
|
||||
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDOpName)),
|
||||
lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3],
|
||||
lsq_perchannel_grad_inputs[kIndex4]};
|
||||
auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d);
|
||||
lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope());
|
||||
|
||||
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL),
|
||||
common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)};
|
||||
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL),
|
||||
common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)};
|
||||
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get());
|
||||
|
||||
common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
|
||||
common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
|
||||
CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
|
||||
lsq_perchannel_grad_d_outputs);
|
||||
}
|
||||
|
||||
void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad(
|
||||
const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
|
||||
const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
|
||||
std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
|
||||
const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
|
||||
if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
|
||||
<< kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
|
||||
}
|
||||
if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be "
|
||||
<< kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size()
|
||||
<< trace::DumpSourceLines(lsq_perchannel_grad_node);
|
||||
}
|
||||
std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_inputs = {
|
||||
NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)),
|
||||
lsq_perchannel_grad_d_outputs[kIndex1]};
|
||||
auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph);
|
||||
MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad);
|
||||
lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope());
|
||||
|
||||
auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)};
|
||||
auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)};
|
||||
common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get());
|
||||
common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad);
|
||||
(*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad);
|
||||
}
|
||||
|
||||
const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const {
|
||||
VarPtr Xs = std::make_shared<SeqVar>();
|
||||
auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradOpName);
|
||||
return VectorRef({prim, Xs});
|
||||
}
|
||||
|
||||
const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
|
||||
const AnfNodePtr &node, const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
|
||||
std::vector<AnfNodePtr> lsq_perlayer_grad_d_outputs;
|
||||
CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs);
|
||||
if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be "
|
||||
<< kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
|
||||
std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_outputs;
|
||||
CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs,
|
||||
&lsq_perlayer_reduce_grad_outputs);
|
||||
if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be "
|
||||
<< kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size()
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
|
||||
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0],
|
||||
lsq_perlayer_reduce_grad_outputs[0]};
|
||||
auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
|
||||
return make_tuple;
|
||||
}
|
||||
|
||||
const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const {
|
||||
VarPtr Xs = std::make_shared<SeqVar>();
|
||||
auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradOpName);
|
||||
return VectorRef({prim, Xs});
|
||||
}
|
||||
|
||||
const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
|
||||
const AnfNodePtr &node,
|
||||
const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
|
||||
std::vector<AnfNodePtr> lsq_perchannel_grad_d_outputs;
|
||||
CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs);
|
||||
if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be "
|
||||
<< kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size()
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
|
||||
std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_outputs;
|
||||
CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs,
|
||||
&lsq_perchannel_reduce_grad_outputs);
|
||||
if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) {
|
||||
MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be "
|
||||
<< kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size()
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
|
||||
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0],
|
||||
lsq_perchannel_reduce_grad_outputs[0]};
|
||||
auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
|
||||
return make_tuple;
|
||||
}
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
|
@ -1,72 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
|
||||
|
||||
#include <vector>
|
||||
#include "backend/common/optimizer/optimizer.h"
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2;
|
||||
constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5;
|
||||
constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2;
|
||||
constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad";
|
||||
constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD";
|
||||
constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce";
|
||||
constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad";
|
||||
constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD";
|
||||
constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce";
|
||||
|
||||
constexpr auto kAttrNeg_trunc = "neg_trunc";
|
||||
constexpr auto kAttrChannelAxis = "channel_axis";
|
||||
|
||||
class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass {
|
||||
public:
|
||||
explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true)
|
||||
: PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {}
|
||||
~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default;
|
||||
const BaseRef DefinePattern() const override;
|
||||
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
|
||||
|
||||
private:
|
||||
void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
|
||||
std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const;
|
||||
void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
|
||||
const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
|
||||
std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const;
|
||||
};
|
||||
|
||||
class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass {
|
||||
public:
|
||||
explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true)
|
||||
: PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {}
|
||||
~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default;
|
||||
const BaseRef DefinePattern() const override;
|
||||
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
|
||||
|
||||
private:
|
||||
void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
|
||||
std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const;
|
||||
void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
|
||||
const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
|
||||
std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const;
|
||||
};
|
||||
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
|
|
@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy";
|
|||
constexpr const char kNameReLUV2[] = "ReLUV2";
|
||||
constexpr const char kNameAccumulateNV2[] = "AccumulateNV2";
|
||||
constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad";
|
||||
constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars";
|
||||
constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient";
|
||||
constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel";
|
||||
constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient";
|
||||
constexpr const char kNameActsULQ[] = "ActsULQ";
|
||||
constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad";
|
||||
constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad";
|
||||
|
|
|
@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits<std::vector<int
|
|||
OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}};
|
||||
REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad))
|
||||
|
||||
// FakeQuantWithMinMaxVars
|
||||
INPUT_MAP(FakeQuantWithMinMaxVars) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
|
||||
ATTR_MAP(FakeQuantWithMinMaxVars) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
|
||||
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars))
|
||||
|
||||
// FakeQuantWithMinMaxVarsGradient
|
||||
INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
|
||||
{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
|
||||
ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
|
||||
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
|
||||
{0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
|
||||
REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient,
|
||||
ADPT_DESC(FakeQuantWithMinMaxVarsGradient))
|
||||
|
||||
// FakeQuantWithMinMaxVarsPerChannel
|
||||
INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
|
||||
ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
|
||||
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel,
|
||||
ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel))
|
||||
|
||||
// FakeQuantWithMinMaxVarsPerChannelGradient
|
||||
INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
|
||||
{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
|
||||
ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
|
||||
{"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
|
||||
{0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
|
||||
REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient,
|
||||
ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient))
|
||||
|
||||
// GreaterEqual
|
||||
INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
|
||||
|
|
|
@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2)
|
|||
DECLARE_OP_ADAPTER(ConfusionMulGrad)
|
||||
DECLARE_OP_USE_OUTPUT(ConfusionMulGrad)
|
||||
|
||||
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars)
|
||||
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars)
|
||||
|
||||
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient)
|
||||
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient)
|
||||
|
||||
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel)
|
||||
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel)
|
||||
|
||||
DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient)
|
||||
DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient)
|
||||
|
||||
DECLARE_OP_ADAPTER(GreaterEqual)
|
||||
DECLARE_OP_USE_OUTPUT(GreaterEqual)
|
||||
|
||||
|
|
|
@ -1509,12 +1509,6 @@ class _CellGraphExecutor:
|
|||
"""
|
||||
self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key)
|
||||
|
||||
def fetch_info_for_quant_export(self, exec_id):
|
||||
"""Get graph proto from pipeline."""
|
||||
if self._graph_executor.has_compiled(exec_id) is False:
|
||||
return None
|
||||
return self._graph_executor.fetch_info_for_quant_export(exec_id)
|
||||
|
||||
|
||||
def ms_memory_recycle():
|
||||
"""
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
approvers:
|
||||
- zhang_xue_tong
|
||||
- jpc_chenjianping
|
||||
- hangangqiang
|
|
@ -1,19 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
MindSpore compression module.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
|
@ -1,24 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Common module for various compression algorithms, now only including datatype definition for quantization.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from mindspore.compression.common.constant import QuantDtype
|
||||
|
||||
__all__ = ["QuantDtype"]
|
|
@ -1,124 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Note:
|
||||
Constant module for compression. This is interface that is subject to change or deletion.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import enum
|
||||
import re
|
||||
from types import DynamicClassAttribute
|
||||
|
||||
|
||||
__all__ = ["QuantDtype"]
|
||||
|
||||
|
||||
@enum.unique
|
||||
class QuantDtype(enum.Enum):
|
||||
"""
|
||||
An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`.
|
||||
"""
|
||||
INT2 = "INT2"
|
||||
INT3 = "INT3"
|
||||
INT4 = "INT4"
|
||||
INT5 = "INT5"
|
||||
INT6 = "INT6"
|
||||
INT7 = "INT7"
|
||||
INT8 = "INT8"
|
||||
|
||||
UINT2 = "UINT2"
|
||||
UINT3 = "UINT3"
|
||||
UINT4 = "UINT4"
|
||||
UINT5 = "UINT5"
|
||||
UINT6 = "UINT6"
|
||||
UINT7 = "UINT7"
|
||||
UINT8 = "UINT8"
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}"
|
||||
|
||||
@staticmethod
|
||||
def is_signed(dtype):
|
||||
"""
|
||||
Get whether the quant datatype is signed.
|
||||
|
||||
Args:
|
||||
dtype (QuantDtype): quant datatype.
|
||||
|
||||
Returns:
|
||||
bool, whether the input quant datatype is signed.
|
||||
|
||||
Examples:
|
||||
>>> quant_dtype = QuantDtype.INT8
|
||||
>>> is_signed = QuantDtype.is_signed(quant_dtype)
|
||||
"""
|
||||
return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
|
||||
QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
|
||||
|
||||
@staticmethod
|
||||
def switch_signed(dtype):
|
||||
"""
|
||||
Switch the signed state of the input quant datatype.
|
||||
|
||||
Args:
|
||||
dtype (QuantDtype): quant datatype.
|
||||
|
||||
Returns:
|
||||
QuantDtype, quant datatype with opposite signed state as the input.
|
||||
|
||||
Examples:
|
||||
>>> quant_dtype = QuantDtype.INT8
|
||||
>>> quant_dtype = QuantDtype.switch_signed(quant_dtype)
|
||||
"""
|
||||
type_map = {
|
||||
QuantDtype.INT2: QuantDtype.UINT2,
|
||||
QuantDtype.INT3: QuantDtype.UINT3,
|
||||
QuantDtype.INT4: QuantDtype.UINT4,
|
||||
QuantDtype.INT5: QuantDtype.UINT5,
|
||||
QuantDtype.INT6: QuantDtype.UINT6,
|
||||
QuantDtype.INT7: QuantDtype.UINT7,
|
||||
QuantDtype.INT8: QuantDtype.UINT8,
|
||||
QuantDtype.UINT2: QuantDtype.INT2,
|
||||
QuantDtype.UINT3: QuantDtype.INT3,
|
||||
QuantDtype.UINT4: QuantDtype.INT4,
|
||||
QuantDtype.UINT5: QuantDtype.INT5,
|
||||
QuantDtype.UINT6: QuantDtype.INT6,
|
||||
QuantDtype.UINT7: QuantDtype.INT7,
|
||||
QuantDtype.UINT8: QuantDtype.INT8
|
||||
}
|
||||
return type_map.get(dtype)
|
||||
|
||||
@DynamicClassAttribute
|
||||
def _value(self):
|
||||
"""The value of the Enum member."""
|
||||
return int(re.search(r"(\d+)", self._value_).group(1))
|
||||
|
||||
@DynamicClassAttribute
|
||||
def num_bits(self):
|
||||
"""
|
||||
Get the num bits of the QuantDtype member.
|
||||
|
||||
Returns:
|
||||
int, the num bits of the QuantDtype member.
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.compression.common import QuantDtype
|
||||
>>> quant_dtype = QuantDtype.INT8
|
||||
>>> num_bits = quant_dtype.num_bits
|
||||
>>> print(num_bits)
|
||||
8
|
||||
"""
|
||||
return self._value
|
|
@ -1,19 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Compression export module.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
|
@ -1,515 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Note:
|
||||
Export for quantization. This is interface that is subject to change or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mindspore import log as logger
|
||||
from mindspore import nn, ops
|
||||
from mindspore._checkparam import Validator
|
||||
from mindspore.common import Tensor
|
||||
from mindspore.common import dtype as mstype
|
||||
from mindspore.common.api import _cell_graph_executor as _executor
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.nn.layer import quant
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import functional as F
|
||||
from mindspore.ops.operations import _inner_ops as inner
|
||||
from mindspore.compression.quant import quant_utils
|
||||
from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
|
||||
|
||||
__all__ = ["ExportToQuantInferNetwork"]
|
||||
|
||||
|
||||
class QuantBlock(Cell):
|
||||
r"""
|
||||
A quant block of Conv/Dense, activation layer for Ascend deploy.
|
||||
|
||||
Calculate Conv or Dense in Int8, with Quant and DeQuant.
|
||||
|
||||
Notes:
|
||||
This block is only for deploy, and not trainable.
|
||||
|
||||
Args:
|
||||
in_channels (int): The number of channels in the input space.
|
||||
out_channels (int): The number of channels in the output space.
|
||||
weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
|
||||
is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
|
||||
bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
|
||||
same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
|
||||
has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
|
||||
activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
|
||||
batchnorm (bool): Specifies to used batchnorm or not. Default: None.
|
||||
activation (string): Specifies activation type. The optional values are as following:
|
||||
'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
|
||||
'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
|
||||
|
||||
Inputs:
|
||||
- **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
|
||||
|
||||
Outputs:
|
||||
Tensor of shape :math:`(N, out\_channels)`.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
core_op,
|
||||
weight,
|
||||
quant_op,
|
||||
dequant_op,
|
||||
dequant_scale,
|
||||
bias=None,
|
||||
activation=None):
|
||||
super(QuantBlock, self).__init__()
|
||||
self.core_op = core_op
|
||||
self.weight = weight
|
||||
self.quant = quant_op
|
||||
self.dequant = dequant_op
|
||||
self.dequant_scale = dequant_scale
|
||||
self.bias = bias
|
||||
self.has_bias = bias is not None
|
||||
self.activation = activation
|
||||
self.has_act = activation is not None
|
||||
self.bias_add = P.BiasAdd()
|
||||
self.sub = P.Sub()
|
||||
self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
|
||||
|
||||
def construct(self, x):
|
||||
x = self.quant(x)
|
||||
if self.has_bias:
|
||||
weight = self.sub(self.weight, self.weight_offset)
|
||||
x = self.core_op(x, weight)
|
||||
x = self.bias_add(x, self.bias)
|
||||
else:
|
||||
x = self.core_op(x, self.weight)
|
||||
x = self.dequant(x, self.dequant_scale)
|
||||
x = F.cast(x, mstype.float32)
|
||||
if self.has_act:
|
||||
x = self.activation(x)
|
||||
return x
|
||||
|
||||
def extend_repr(self):
|
||||
s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
|
||||
if self.has_bias:
|
||||
s += f', bias=shape[{self.bias.shape}]'
|
||||
if self.has_act:
|
||||
s += f', activation={self.activation}'
|
||||
s += f', dequant={self.dequant}'
|
||||
return s
|
||||
|
||||
|
||||
class QuantMindirBlock(Cell):
|
||||
"""A quant binary block of Conv/Dense, activation layer for export MINDIR model.
|
||||
|
||||
Args:
|
||||
core_op (Cell): The operation cell.
|
||||
weight (Tensor): The weight of the cell.
|
||||
bias (Tensor): The bias of the cell. Default: None.
|
||||
activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
|
||||
param_dict (dict): The information of the cell.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
core_op,
|
||||
weight,
|
||||
bias=None,
|
||||
activation=None,
|
||||
param_dict=None):
|
||||
|
||||
super(QuantMindirBlock, self).__init__()
|
||||
self.core_op = core_op
|
||||
if activation is not None:
|
||||
self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
|
||||
self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
|
||||
self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
|
||||
if param_dict["output_maxq"] is not None:
|
||||
self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
|
||||
self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
|
||||
self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
|
||||
if hasattr(core_op, 'pad_mode'):
|
||||
self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
|
||||
self.core_op.add_prim_attr("act_num_bits", Tensor(8))
|
||||
self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
|
||||
self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
|
||||
if param_dict["input_narrow_range"] is not None:
|
||||
self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
|
||||
if param_dict["output_narrow_range"] is not None:
|
||||
self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
|
||||
if param_dict["input_maxq"] == 'None':
|
||||
self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
|
||||
self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
|
||||
elif param_dict["input_maxq"] is not None:
|
||||
self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
|
||||
self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
|
||||
|
||||
self.weight = weight
|
||||
self.bias = bias
|
||||
self.has_bias = bias is not None
|
||||
self.activation = activation
|
||||
self.has_act = activation is not None
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
||||
def construct(self, x):
|
||||
if self.has_bias:
|
||||
x = self.core_op(x, self.weight)
|
||||
x = self.bias_add(x, self.bias)
|
||||
else:
|
||||
x = self.core_op(x, self.weight)
|
||||
if self.has_act:
|
||||
x = self.activation(x)
|
||||
return x
|
||||
|
||||
def extend_repr(self):
|
||||
s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
|
||||
if self.has_bias:
|
||||
s += f', bias=shape[{self.bias.shape}]'
|
||||
if self.has_act:
|
||||
s += f', activation={self.activation}'
|
||||
return s
|
||||
|
||||
|
||||
class ExportToQuantInferNetwork:
|
||||
"""
|
||||
Convert quantization aware network to infer network.
|
||||
|
||||
Args:
|
||||
network (Cell): MindSpore quantization aware training network.
|
||||
inputs (Tensor): Input tensors of the `quantization aware training network`.
|
||||
mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
|
||||
Default: 127.5.
|
||||
std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
|
||||
of network. Default: 127.5.
|
||||
is_mindir (bool): Whether export MINDIR format. Default: False.
|
||||
|
||||
Returns:
|
||||
Cell, Infer network.
|
||||
"""
|
||||
|
||||
def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
|
||||
network = Validator.check_isinstance('network', network, (nn.Cell,))
|
||||
self.data_type = mstype.int8
|
||||
self.network = copy.deepcopy(network)
|
||||
self.network_bk = copy.deepcopy(network)
|
||||
self.get_inputs_table(inputs)
|
||||
self.mean = mean
|
||||
self.std_dev = std_dev
|
||||
self.is_mindir = is_mindir
|
||||
self.upcell = None
|
||||
|
||||
@staticmethod
|
||||
def __get_dequant_scale(scale_a_in, scale_w):
|
||||
"""Get dequant scale"""
|
||||
scale_deq = scale_a_in * scale_w
|
||||
|
||||
# fuse parameter
|
||||
# |--------|47:40|--------|39:32|--------|31:0|
|
||||
# offset_w [8] shift_N [8] deq_scale [32]
|
||||
float32_deq_scale = scale_deq.astype(np.float32)
|
||||
uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
|
||||
scale_length = scale_deq.size # channel
|
||||
dequant_param = np.zeros(scale_length, dtype=np.uint64)
|
||||
for index in range(scale_length):
|
||||
dequant_param[index] += uint32_deq_scale[index]
|
||||
scale_deq = Tensor(dequant_param, mstype.uint64)
|
||||
return scale_deq
|
||||
|
||||
def get_inputs_table(self, inputs):
|
||||
"""Get the input quantization parameters of quantization cell for quant export."""
|
||||
phase_name = 'export_quant'
|
||||
graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
|
||||
self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
|
||||
|
||||
def run(self):
|
||||
"""Start to convert."""
|
||||
logger.warning("The compression module is deprecated and may not be supported in later version, please use "
|
||||
"MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
|
||||
self.network.update_cell_prefix()
|
||||
network = self.network
|
||||
if isinstance(network, _AddFakeQuantInput):
|
||||
network = network.network
|
||||
network = self._convert_quant2deploy(network)
|
||||
return network
|
||||
|
||||
def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
|
||||
"""convert network's quant subcell to deploy subcell"""
|
||||
scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
|
||||
|
||||
# Build the `Quant` `Dequant` op.
|
||||
# Quant only support perlayer version. Need check here.
|
||||
if float(scale_a_in) == 0:
|
||||
raise ValueError("If `scale_a_in` is zero, will lead to zero error.")
|
||||
quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
|
||||
scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
|
||||
dequant_op = inner.Dequant()
|
||||
|
||||
if isinstance(activation, _AddFakeQuantAfterSubCell):
|
||||
activation = activation.subcell
|
||||
elif hasattr(activation, "get_origin"):
|
||||
activation = activation.get_origin()
|
||||
|
||||
# get op
|
||||
if isinstance(cell_core, quant.DenseQuant):
|
||||
op_core = P.MatMul()
|
||||
else:
|
||||
op_core = cell_core.conv
|
||||
|
||||
# get the `weight` and `bias`
|
||||
weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
|
||||
|
||||
if self.is_mindir:
|
||||
block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
|
||||
else:
|
||||
block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
|
||||
return block
|
||||
|
||||
def _get_input_quant_param(self, minq_name, np_type, param_dict):
|
||||
"""get input quant parameter for quant block"""
|
||||
fake_quant_a_in_prefix = minq_name[:-5]
|
||||
cells = self.network_bk.cells_and_names()
|
||||
for cell in cells:
|
||||
if cell[0].endswith(fake_quant_a_in_prefix):
|
||||
fake_quant_a_in = cell[1]
|
||||
break
|
||||
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
|
||||
quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
|
||||
param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
|
||||
return scale_a_in, zp_a_in
|
||||
|
||||
def __get_quant_param(self, cell_core, fake_quant_a_out):
|
||||
"""get parameter for quant block"""
|
||||
w_minq_name = cell_core.fake_quant_weight.minq.name
|
||||
w_maxq_name = cell_core.fake_quant_weight.maxq.name
|
||||
np_type = mstype.dtype_to_nptype(self.data_type)
|
||||
param_dict = dict()
|
||||
param_dict["filter_maxq"] = None
|
||||
param_dict["filter_minq"] = None
|
||||
param_dict["output_maxq"] = None
|
||||
param_dict["output_minq"] = None
|
||||
param_dict["input_maxq"] = None
|
||||
param_dict["input_minq"] = None
|
||||
param_dict["input_narrow_range"] = None
|
||||
param_dict["output_narrow_range"] = None
|
||||
param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
|
||||
param_dict["mean"] = self.mean
|
||||
param_dict["std_dev"] = self.std_dev
|
||||
param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
|
||||
param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
|
||||
|
||||
scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
|
||||
quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
|
||||
if fake_quant_a_out is not None:
|
||||
_, _, param_dict["output_maxq"], param_dict["output_minq"] = \
|
||||
quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
|
||||
param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
|
||||
|
||||
info = self.quant_info_table.get(w_minq_name, None)
|
||||
if not info:
|
||||
info = self.quant_info_table.get(w_maxq_name, None)
|
||||
if info:
|
||||
_, minq_name = info
|
||||
if minq_name == 'input':
|
||||
scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
|
||||
(1 / self.std_dev), round(self.mean), 'None', 'None'
|
||||
else:
|
||||
scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
|
||||
else:
|
||||
# skip quant layer
|
||||
scale_a_in, zp_a_in = 1.0, 0.0
|
||||
return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
|
||||
|
||||
def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
|
||||
"""Get weight and bias for quantizaiton"""
|
||||
np_type = mstype.dtype_to_nptype(self.data_type)
|
||||
weight = cell_core.weight.data.asnumpy()
|
||||
bias = None
|
||||
if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
|
||||
if cell_core.has_bias:
|
||||
bias = cell_core.bias.data.asnumpy()
|
||||
elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
|
||||
weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
|
||||
elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
|
||||
weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
|
||||
weight_b = weight
|
||||
bias_b = bias
|
||||
# apply the quant
|
||||
quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
|
||||
cell_core.fake_quant_weight.num_bits,
|
||||
cell_core.fake_quant_weight.narrow_range)
|
||||
weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
|
||||
if bias is not None:
|
||||
if 0 in scale_a_in:
|
||||
raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.")
|
||||
if 0 in scale_w:
|
||||
raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.")
|
||||
bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
|
||||
|
||||
if isinstance(cell_core, quant.DenseQuant):
|
||||
weight = np.transpose(weight)
|
||||
weight_b = np.transpose(weight_b)
|
||||
|
||||
weight_tensor = Tensor(weight, self.data_type)
|
||||
weight_b_tensor = Tensor(weight_b)
|
||||
if bias_b is not None:
|
||||
bias_b_tensor = Tensor(bias_b, mstype.float32)
|
||||
return weight_tensor, bias, weight_b_tensor, bias_b_tensor
|
||||
return weight_tensor, bias, weight_b_tensor, None
|
||||
|
||||
def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
|
||||
"""add output quant info for quant op for export mindir."""
|
||||
if self.is_mindir:
|
||||
if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
|
||||
np_type = mstype.dtype_to_nptype(self.data_type)
|
||||
_, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
|
||||
origin_op.add_prim_attr('output_maxq', Tensor(maxq))
|
||||
origin_op.add_prim_attr('output_minq', Tensor(minq))
|
||||
|
||||
def _convert_subcell(self, network, change, name, subcell):
|
||||
"""Convert subcell to ant subcell."""
|
||||
if subcell is not None and hasattr(subcell, "fake_quant_weight"):
|
||||
new_subcell = self._get_quant_block(subcell, None, None)
|
||||
prefix = subcell.param_prefix
|
||||
new_subcell.update_parameters_name(prefix + '.')
|
||||
self.upcell = new_subcell
|
||||
network.insert_child_to_cell(name, new_subcell)
|
||||
change = True
|
||||
return network, change
|
||||
|
||||
def _convert_conv(self, network, change, name, subcell):
|
||||
"""Convert subcell to ant subcell for conv."""
|
||||
cell_core = subcell.conv
|
||||
activation = subcell.activation
|
||||
fake_quant_act = None
|
||||
if hasattr(activation, 'fake_quant_act_before'):
|
||||
fake_quant_act = activation.fake_quant_act_before
|
||||
elif hasattr(activation, 'fake_quant_act'):
|
||||
fake_quant_act = activation.fake_quant_act
|
||||
if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
|
||||
new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
|
||||
self.upcell = None
|
||||
prefix = subcell.param_prefix
|
||||
new_subcell.update_parameters_name(prefix + '.')
|
||||
network.insert_child_to_cell(name, new_subcell)
|
||||
change = True
|
||||
return network, change
|
||||
|
||||
def _convert_dense(self, network, change, name, subcell):
|
||||
"""Convert subcell to ant subcell for dense."""
|
||||
cell_core = subcell.dense
|
||||
activation = subcell.activation
|
||||
fake_quant_act = None
|
||||
if hasattr(activation, 'fake_quant_act_before'):
|
||||
fake_quant_act = activation.fake_quant_act_before
|
||||
elif hasattr(activation, 'fake_quant_act'):
|
||||
fake_quant_act = activation.fake_quant_act
|
||||
if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
|
||||
new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
|
||||
prefix = subcell.param_prefix
|
||||
new_subcell.update_parameters_name(prefix + '.')
|
||||
network.insert_child_to_cell(name, new_subcell)
|
||||
self.upcell = None
|
||||
change = True
|
||||
return network, change
|
||||
|
||||
def _convert_act(self, subcell):
|
||||
"""Convert subcell to ant subcell for activation."""
|
||||
activation = subcell.get_origin()
|
||||
if isinstance(activation, nn.ReLU):
|
||||
self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
|
||||
elif isinstance(activation, nn.ReLU6):
|
||||
self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
|
||||
if self.upcell:
|
||||
self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
|
||||
return activation
|
||||
|
||||
def _convert_add(self, subcell):
|
||||
"""Convert subcell to ant subcell for add."""
|
||||
if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
|
||||
add_op = subcell.add.subcell
|
||||
subcell.__delattr__("add")
|
||||
subcell.__setattr__("add", add_op)
|
||||
add_op = subcell.add
|
||||
self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
|
||||
subcell.__delattr__("fake_quant_act")
|
||||
subcell.__setattr__("fake_quant_act", P.identity())
|
||||
|
||||
def _convert_observer(self, network, name, subcell):
|
||||
"""Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
|
||||
if self.upcell:
|
||||
self._add_output_min_max_for_op(self.upcell.core_op, subcell)
|
||||
network.__delattr__(name)
|
||||
network.__setattr__(name, P.identity())
|
||||
|
||||
def _convert_fake_quant_after_cell(self, network, name, subcell):
|
||||
"""Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
|
||||
op = subcell.subcell
|
||||
self._add_output_min_max_for_op(op, subcell.fake_quant_act)
|
||||
network.__delattr__(name)
|
||||
network.__setattr__(name, op)
|
||||
|
||||
def _convert_core_quant_subcell(self, network, change, name, subcell):
|
||||
"""Convert subcell to ant subcell for conv and dense."""
|
||||
is_core_subcell = True
|
||||
if isinstance(subcell, nn.Conv2dBnAct):
|
||||
network, change = self._convert_conv(network, change, name, subcell)
|
||||
elif isinstance(subcell, nn.DenseBnAct):
|
||||
network, change = self._convert_dense(network, change, name, subcell)
|
||||
elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
|
||||
quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
|
||||
network, change = self._convert_subcell(network, change, name, subcell)
|
||||
else:
|
||||
is_core_subcell = False
|
||||
return is_core_subcell, network, change
|
||||
|
||||
def _convert_other_quant_subcell(self, network, change, name, subcell):
|
||||
"""Convert subcell to ant subcell for cell except conv and dense."""
|
||||
is_other_subcell = True
|
||||
if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
|
||||
activation = self._convert_act(subcell)
|
||||
network.insert_child_to_cell(name, activation)
|
||||
change = True
|
||||
elif isinstance(subcell, nn.TensorAddQuant):
|
||||
self._convert_add(subcell)
|
||||
elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
|
||||
self._convert_observer(network, name, subcell)
|
||||
elif isinstance(subcell, _AddFakeQuantAfterSubCell):
|
||||
self._convert_fake_quant_after_cell(network, name, subcell)
|
||||
change = True
|
||||
else:
|
||||
is_other_subcell = False
|
||||
return is_other_subcell, network, change
|
||||
|
||||
def _convert_quant2deploy(self, network):
|
||||
"""Convert network's all quant subcell to deploy subcell."""
|
||||
cells = network.name_cells()
|
||||
change = False
|
||||
for name in cells:
|
||||
subcell = cells[name]
|
||||
if subcell == network:
|
||||
continue
|
||||
is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
|
||||
is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
|
||||
if not is_core_quant_subcell and not is_other_quant_subcell:
|
||||
self.upcell = None
|
||||
self._convert_quant2deploy(subcell)
|
||||
if isinstance(network, nn.SequentialCell) and change:
|
||||
network.cell_list = list(network.cells())
|
||||
return network
|
|
@ -1,28 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Quantization module, including base class of the quantizer, the quantization aware training algorithm,
|
||||
and quantization utils.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from .quantizer import OptimizeOption
|
||||
from .qat import QuantizationAwareTraining, create_quant_config
|
||||
from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers
|
||||
|
||||
__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining",
|
||||
"create_quant_config", "OptimizeOption"]
|
|
@ -1,634 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Quantization aware training
|
||||
|
||||
User can use quantization aware to train a model. MindSpore supports quantization aware training,
|
||||
which models quantization errors in both the forward and backward passes using fake-quantization
|
||||
operations. Note that the entire computation is carried out in floating point. At the end of quantization
|
||||
aware training, MindSpore provides conversion functions to convert the trained model into lower precision.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import re
|
||||
import numpy as np
|
||||
import mindspore.context as context
|
||||
from mindspore import log as logger
|
||||
from mindspore import nn, ops
|
||||
from mindspore._checkparam import Validator, Rel
|
||||
from mindspore.nn.layer import quant
|
||||
from mindspore.ops import functional as F
|
||||
from ..common import QuantDtype
|
||||
from .quantizer import Quantizer, OptimizeOption
|
||||
from .quant_utils import compute_kl_threshold
|
||||
|
||||
__all__ = ["QuantizationAwareTraining", "create_quant_config"]
|
||||
|
||||
|
||||
def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver),
|
||||
quant_delay=(0, 0),
|
||||
quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
|
||||
per_channel=(False, False),
|
||||
symmetric=(False, False),
|
||||
narrow_range=(False, False),
|
||||
mode="DEFAULT"):
|
||||
r"""
|
||||
Config the observer type of weights and data flow with quant parameters.
|
||||
|
||||
Args:
|
||||
quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
|
||||
applies to weights and the second applies to data flow. Currently, only
|
||||
:class:`FakeQuantWithMinMaxObserver` supported.
|
||||
Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
|
||||
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
|
||||
during train and eval. The first element represents weights and the second element represents data flow.
|
||||
Default: (0, 0).
|
||||
quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
|
||||
element represents weights and the second element represents data flow.
|
||||
Default: (QuantDtype.INT8, QuantDtype.INT8).
|
||||
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
|
||||
then base on per channel, otherwise base on per layer. The first element represents weights
|
||||
and the second element represents data flow, and the second element must be `False` now.
|
||||
Default: (False, False).
|
||||
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
|
||||
base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
|
||||
element represents data flow. Default: (False, False).
|
||||
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
|
||||
The first element represents weights and the second element represents data flow.
|
||||
Default: (False, False).
|
||||
mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
|
||||
Default: "DEFAULT".
|
||||
|
||||
Returns:
|
||||
QuantConfig, contains the observer type of weight and activation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the second element of `per_channel` is not `False`.
|
||||
"""
|
||||
if per_channel[-1]:
|
||||
raise ValueError("Arg 'per_channel' second element must be 'False'.")
|
||||
weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
|
||||
per_channel=per_channel[0], symmetric=symmetric[0],
|
||||
narrow_range=narrow_range[0], mode=mode)
|
||||
act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
|
||||
per_channel=per_channel[-1], symmetric=symmetric[-1],
|
||||
narrow_range=narrow_range[-1], mode=mode)
|
||||
return quant.QuantConfig(weight=weight_observer, activation=act_observer)
|
||||
|
||||
|
||||
class _AddFakeQuantInput(nn.Cell):
|
||||
"""
|
||||
Add FakeQuant OP at input of the network. Only support one input case.
|
||||
"""
|
||||
|
||||
def __init__(self, network, quant_delay=0):
|
||||
super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
|
||||
self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
|
||||
quant_delay=quant_delay, ema=True)
|
||||
self.fake_quant_input.update_parameters_name('fake_quant_input.')
|
||||
self.network = network
|
||||
|
||||
def construct(self, data):
|
||||
data = self.fake_quant_input(data)
|
||||
output = self.network(data)
|
||||
return output
|
||||
|
||||
|
||||
class _AddFakeQuantAfterSubCell(nn.Cell):
|
||||
"""
|
||||
Add FakeQuant OP after of the sub Cell.
|
||||
"""
|
||||
|
||||
def __init__(self, subcell, **kwargs):
|
||||
super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
|
||||
self.subcell = subcell
|
||||
self.mode = "DEFAULT"
|
||||
self.max_init = 6
|
||||
self.min_init = -6
|
||||
|
||||
if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]:
|
||||
self.mode = "LEARNED_SCALE"
|
||||
self.max_init = 16
|
||||
self.min_init = -16
|
||||
|
||||
self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init,
|
||||
max_init=self.max_init,
|
||||
ema=True,
|
||||
quant_dtype=kwargs.get("quant_dtype"),
|
||||
quant_delay=kwargs.get("quant_delay"),
|
||||
per_channel=kwargs.get("per_channel"),
|
||||
symmetric=kwargs.get("symmetric"),
|
||||
narrow_range=kwargs.get("narrow_range"),
|
||||
mode=self.mode)
|
||||
|
||||
def construct(self, *data):
|
||||
output = self.subcell(*data)
|
||||
output = self.fake_quant_act(output)
|
||||
return output
|
||||
|
||||
|
||||
class QuantizationAwareTraining(Quantizer):
|
||||
r"""
|
||||
Quantizer for quantization aware training.
|
||||
|
||||
Args:
|
||||
bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
|
||||
freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
|
||||
Default: 1e7.
|
||||
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
|
||||
during train and eval. The first element represents weights and the second element represents data flow.
|
||||
Default: (0, 0).
|
||||
quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
|
||||
element represents weights and the second element represents data flow. It is necessary to consider the
|
||||
precision support of hardware devices in the practical quantization infer scenario.
|
||||
Default: (QuantDtype.INT8, QuantDtype.INT8).
|
||||
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
|
||||
then base on per channel, otherwise base on per layer. The first element represents weights and the
|
||||
second element represents data flow, and the second element must be `False` now. Default: (False, False).
|
||||
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
|
||||
base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
|
||||
element represents data flow. Default: (False, False).
|
||||
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
|
||||
The first element represents weights and the second element represents data flow.
|
||||
Default: (False, False).
|
||||
optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
|
||||
only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
|
||||
`LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
|
||||
includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
|
||||
such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
|
||||
scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
|
||||
one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
|
||||
Raises:
|
||||
TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
|
||||
TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
|
||||
is not bool.
|
||||
TypeError: If the element of `quant_dtype` is not `QuantDtype`.
|
||||
ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
|
||||
not less than 2.
|
||||
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
|
||||
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
|
||||
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
|
||||
ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.compression.quant import QuantizationAwareTraining
|
||||
>>> from mindspore import nn
|
||||
>>> class LeNet5(nn.Cell):
|
||||
... def __init__(self, num_class=10, channel=1):
|
||||
... super(LeNet5, self).__init__()
|
||||
... self.type = "fusion"
|
||||
... self.num_class = num_class
|
||||
...
|
||||
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
|
||||
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
|
||||
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
|
||||
... # change `nn.Dense` to `nn.DenseBnAct`
|
||||
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
|
||||
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
|
||||
... self.fc3 = nn.DenseBnAct(84, self.num_class)
|
||||
...
|
||||
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||
... self.flatten = nn.Flatten()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... x = self.conv1(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.conv2(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.flatten(x)
|
||||
... x = self.fc1(x)
|
||||
... x = self.fc2(x)
|
||||
... x = self.fc3(x)
|
||||
... return x
|
||||
...
|
||||
>>> net = LeNet5()
|
||||
>>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
|
||||
>>> net_qat = quantizer.quantize(net)
|
||||
"""
|
||||
__quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"]
|
||||
|
||||
def __init__(self,
|
||||
bn_fold=True,
|
||||
freeze_bn=10000000,
|
||||
quant_delay=(0, 0),
|
||||
quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
|
||||
per_channel=(False, False),
|
||||
symmetric=(False, False),
|
||||
narrow_range=(False, False),
|
||||
optimize_option=OptimizeOption.QAT,
|
||||
one_conv_fold=True):
|
||||
"""Init for QuantizationAwareTraining quantizer"""
|
||||
super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option)
|
||||
|
||||
def convert2list(name, value):
|
||||
if not isinstance(value, list) and not isinstance(value, tuple):
|
||||
value = [value]
|
||||
elif len(value) > 2:
|
||||
raise ValueError("input `{}` len should less then 2".format(name))
|
||||
return value
|
||||
|
||||
quant_delay_list = convert2list("quant delay", quant_delay)
|
||||
quant_dtype_list = convert2list("quant dtype", quant_dtype)
|
||||
per_channel_list = convert2list("per channel", per_channel)
|
||||
symmetric_list = convert2list("symmetric", symmetric)
|
||||
narrow_range_list = convert2list("narrow range", narrow_range)
|
||||
|
||||
self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay")
|
||||
self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay")
|
||||
self.bn_fold = Validator.check_bool(bn_fold, "bn fold")
|
||||
self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn")
|
||||
self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype)
|
||||
self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype)
|
||||
self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel")
|
||||
self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel")
|
||||
self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric")
|
||||
self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric")
|
||||
self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range")
|
||||
self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range")
|
||||
self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold")
|
||||
self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv,
|
||||
nn.DenseBnAct: self._convert_dense}
|
||||
self.mode = "DEFAULT"
|
||||
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
|
||||
self.mode = "LEARNED_SCALE"
|
||||
if not self.weight_symmetric or not self.act_symmetric:
|
||||
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support "
|
||||
"symmetric=(True, True) for quant")
|
||||
if not self.weight_range or not self.act_range:
|
||||
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) "
|
||||
"for quant")
|
||||
if self.freeze_bn != 0:
|
||||
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, "
|
||||
"but get freeze_bn={}".format(self.freeze_bn))
|
||||
if self.weight_qdelay != 0 or self.act_qdelay != 0:
|
||||
raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)")
|
||||
self.quant_config = create_quant_config(quant_delay=quant_delay_list,
|
||||
quant_dtype=quant_dtype_list,
|
||||
per_channel=per_channel_list,
|
||||
symmetric=symmetric_list,
|
||||
narrow_range=narrow_range_list,
|
||||
mode=self.mode)
|
||||
self.eps = 1e-5
|
||||
|
||||
@staticmethod
|
||||
def _convert_op_name(name):
|
||||
pattern = re.compile(r'([A-Z]{1})')
|
||||
name_new = re.sub(pattern, r'_\1', name).lower()
|
||||
if name_new[0] == '_':
|
||||
name_new = name_new[1:]
|
||||
return name_new
|
||||
|
||||
def quantize(self, network):
|
||||
"""
|
||||
Quant API to convert input network to a quantization aware training network.
|
||||
|
||||
Note:
|
||||
Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.
|
||||
|
||||
Args:
|
||||
network (Cell): network to be quantized.
|
||||
|
||||
Returns:
|
||||
Cell, a quantization aware training network.
|
||||
|
||||
Raises:
|
||||
KeyError: If the `device_target` set in context is not in `support_device`.
|
||||
"""
|
||||
|
||||
logger.warning("The compression module is deprecated and may not be supported in later version, please use "
|
||||
"MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
|
||||
support_device = ["Ascend", "GPU"]
|
||||
if context.get_context('device_target') not in support_device:
|
||||
raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
|
||||
|
||||
if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option:
|
||||
network.update_cell_prefix()
|
||||
network = self._convert_subcells2quant(network)
|
||||
network.update_cell_type("quant")
|
||||
return network
|
||||
|
||||
def _convert_subcells2quant(self, network):
|
||||
"""
|
||||
convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell
|
||||
"""
|
||||
cells = network.name_cells()
|
||||
change = False
|
||||
for name in cells:
|
||||
subcell = cells[name]
|
||||
if subcell == network:
|
||||
continue
|
||||
if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)):
|
||||
prefix = subcell.param_prefix
|
||||
new_subcell = self._convert_method_map[type(subcell)](subcell)
|
||||
new_subcell.update_parameters_name(prefix + '.')
|
||||
network.insert_child_to_cell(name, new_subcell)
|
||||
change = True
|
||||
else:
|
||||
self._convert_subcells2quant(subcell)
|
||||
if isinstance(network, nn.SequentialCell) and change:
|
||||
network.cell_list = list(network.cells())
|
||||
|
||||
# add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell.
|
||||
if isinstance(network, (nn.FakeQuantWithMinMaxObserver,
|
||||
nn.Conv2dBnFoldQuantOneConv,
|
||||
nn.Conv2dBnFoldQuant,
|
||||
nn.Conv2dBnWithoutFoldQuant,
|
||||
nn.Conv2dQuant,
|
||||
nn.DenseQuant,
|
||||
nn.ActQuant,
|
||||
nn.TensorAddQuant,
|
||||
nn.MulQuant)):
|
||||
return network
|
||||
|
||||
add_list = []
|
||||
for name in network.__dict__:
|
||||
if name[0] == '_':
|
||||
continue
|
||||
attr = network.__dict__[name]
|
||||
if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name:
|
||||
add_list.append((name, attr))
|
||||
for name, prim_op in add_list:
|
||||
prefix = name
|
||||
add_quant = _AddFakeQuantAfterSubCell(prim_op,
|
||||
quant_dtype=self.act_dtype,
|
||||
quant_delay=self.act_qdelay,
|
||||
per_channel=self.act_channel,
|
||||
symmetric=self.act_symmetric,
|
||||
narrow_range=self.act_range,
|
||||
optimize_option=self.optimize_option)
|
||||
if network.param_prefix:
|
||||
prefix = '.'.join([network.param_prefix, prefix])
|
||||
add_quant.update_parameters_name(prefix + '.')
|
||||
del network.__dict__[name]
|
||||
network.insert_child_to_cell(name, add_quant)
|
||||
return network
|
||||
|
||||
def _convert_conv(self, subcell):
|
||||
"""
|
||||
convert Conv2d cell to quant cell
|
||||
"""
|
||||
min_init = -6
|
||||
max_init = 6
|
||||
if self.eps == 0:
|
||||
raise ValueError("`epsilon` is zero may lead to divide zero error")
|
||||
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
|
||||
subcell_weight_para = subcell.conv.weight.data.asnumpy()
|
||||
if subcell.has_bn:
|
||||
scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
|
||||
np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
|
||||
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
|
||||
min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
|
||||
self.quant_config = self.quant_config._replace(
|
||||
weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
|
||||
|
||||
conv_inner = subcell.conv
|
||||
if subcell.has_bn:
|
||||
bn_inner = subcell.batchnorm
|
||||
if self.bn_fold:
|
||||
if self.one_conv_fold:
|
||||
conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels,
|
||||
conv_inner.out_channels,
|
||||
kernel_size=conv_inner.kernel_size,
|
||||
stride=conv_inner.stride,
|
||||
pad_mode=conv_inner.pad_mode,
|
||||
padding=conv_inner.padding,
|
||||
dilation=conv_inner.dilation,
|
||||
group=conv_inner.group,
|
||||
eps=bn_inner.eps,
|
||||
momentum=1 - bn_inner.momentum,
|
||||
has_bias=conv_inner.has_bias,
|
||||
bias_init=conv_inner.bias_init,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype,
|
||||
fake=True)
|
||||
else:
|
||||
conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels,
|
||||
conv_inner.out_channels,
|
||||
kernel_size=conv_inner.kernel_size,
|
||||
stride=conv_inner.stride,
|
||||
pad_mode=conv_inner.pad_mode,
|
||||
padding=conv_inner.padding,
|
||||
dilation=conv_inner.dilation,
|
||||
group=conv_inner.group,
|
||||
eps=bn_inner.eps,
|
||||
momentum=1 - bn_inner.momentum,
|
||||
has_bias=conv_inner.has_bias,
|
||||
bias_init=conv_inner.bias_init,
|
||||
freeze_bn=self.freeze_bn,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype,
|
||||
fake=True)
|
||||
# change original network Batch Normalization OP parameters to quant network
|
||||
conv_inner.gamma = subcell.batchnorm.gamma
|
||||
conv_inner.beta = subcell.batchnorm.beta
|
||||
conv_inner.moving_mean = subcell.batchnorm.moving_mean
|
||||
conv_inner.moving_variance = subcell.batchnorm.moving_variance
|
||||
else:
|
||||
conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels,
|
||||
conv_inner.out_channels,
|
||||
kernel_size=conv_inner.kernel_size,
|
||||
stride=conv_inner.stride,
|
||||
pad_mode=conv_inner.pad_mode,
|
||||
padding=conv_inner.padding,
|
||||
dilation=conv_inner.dilation,
|
||||
group=conv_inner.group,
|
||||
eps=bn_inner.eps,
|
||||
momentum=1 - bn_inner.momentum,
|
||||
has_bias=conv_inner.has_bias,
|
||||
bias_init=conv_inner.bias_init,
|
||||
quant_config=self.quant_config)
|
||||
# change original network Batch Normalization OP parameters to quant network
|
||||
conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
|
||||
conv_inner.batchnorm.beta = subcell.batchnorm.beta
|
||||
conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
|
||||
conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance
|
||||
del subcell.batchnorm
|
||||
subcell.batchnorm = None
|
||||
subcell.has_bn = False
|
||||
else:
|
||||
conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels,
|
||||
kernel_size=conv_inner.kernel_size, stride=conv_inner.stride,
|
||||
pad_mode=conv_inner.pad_mode, padding=conv_inner.padding,
|
||||
dilation=conv_inner.dilation, group=conv_inner.group,
|
||||
has_bias=conv_inner.has_bias, quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype)
|
||||
# change original network Conv2D OP parameters to quant network
|
||||
conv_inner.weight = subcell.conv.weight
|
||||
if subcell.conv.has_bias:
|
||||
conv_inner.bias = subcell.conv.bias
|
||||
subcell.conv = conv_inner
|
||||
if subcell.has_act and subcell.activation is not None:
|
||||
subcell.activation = self._convert_activation(subcell.activation)
|
||||
elif subcell.after_fake:
|
||||
subcell.has_act = True
|
||||
subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype,
|
||||
quant_delay=self.act_qdelay, per_channel=self.act_channel,
|
||||
symmetric=self.act_symmetric, narrow_range=self.act_range,
|
||||
optimize_option=self.optimize_option)
|
||||
return subcell
|
||||
|
||||
def _convert_dense(self, subcell):
|
||||
"""
|
||||
convert dense cell to quant cell
|
||||
"""
|
||||
min_init = -6
|
||||
max_init = 6
|
||||
if self.eps == 0:
|
||||
raise ValueError("`epsilon` is zero may lead to divide zero error")
|
||||
if OptimizeOption.LEARNED_SCALE in self.optimize_option:
|
||||
subcell_weight_para = subcell.dense.weight.data.asnumpy()
|
||||
if subcell.has_bn:
|
||||
scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
|
||||
np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
|
||||
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
|
||||
min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
|
||||
self.quant_config = self.quant_config._replace(
|
||||
weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
|
||||
|
||||
dense_inner = subcell.dense
|
||||
dense_inner = quant.DenseQuant(dense_inner.in_channels,
|
||||
dense_inner.out_channels,
|
||||
has_bias=dense_inner.has_bias,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.weight_dtype)
|
||||
# change original network Dense OP parameters to quant network
|
||||
dense_inner.weight = subcell.dense.weight
|
||||
if subcell.dense.has_bias:
|
||||
dense_inner.bias = subcell.dense.bias
|
||||
subcell.dense = dense_inner
|
||||
if subcell.has_act and subcell.activation is not None:
|
||||
subcell.activation = self._convert_activation(subcell.activation)
|
||||
elif subcell.after_fake:
|
||||
subcell.has_act = True
|
||||
subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
|
||||
quant_dtype=self.act_dtype,
|
||||
quant_delay=self.act_qdelay,
|
||||
per_channel=self.act_channel,
|
||||
symmetric=self.act_symmetric,
|
||||
narrow_range=self.act_range,
|
||||
optimize_option=self.optimize_option)
|
||||
return subcell
|
||||
|
||||
def _convert_activation(self, activation):
|
||||
"""
|
||||
convert activation cell to quant cell
|
||||
"""
|
||||
act_class = activation.__class__
|
||||
act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid]
|
||||
act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish]
|
||||
|
||||
if act_class in act_list:
|
||||
return quant.ActQuant(activation=activation,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.act_dtype)
|
||||
if act_class in act_list_with_fake_before:
|
||||
return quant.ActQuant(activation=activation,
|
||||
ema=True,
|
||||
fake_before=True,
|
||||
quant_config=self.quant_config,
|
||||
quant_dtype=self.act_dtype)
|
||||
raise ValueError("Unsupported activation in auto quant: ", act_class)
|
||||
|
||||
def _kl_init(self, subcell_weight_para, weight_dtype):
|
||||
"""
|
||||
Calculate the value of max_init and min_init with compute_kl_threshold.
|
||||
"""
|
||||
if self.weight_channel:
|
||||
max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
|
||||
for weight_para_each in subcell_weight_para]
|
||||
min_init = [-x for x in max_init]
|
||||
else:
|
||||
max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
|
||||
min_init = [-x for x in max_init]
|
||||
return min_init, max_init
|
||||
|
||||
def _set_mixed_bits(self, network, strategy):
|
||||
r"""
|
||||
Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
|
||||
optimize_option.
|
||||
|
||||
Args:
|
||||
network (Cell): Input network.
|
||||
strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
|
||||
..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
|
||||
convolution layer is supported.
|
||||
|
||||
Returns:
|
||||
Cell, a network with mixed bit strategy configured.
|
||||
|
||||
Raises:
|
||||
ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
|
||||
"""
|
||||
if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
|
||||
raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
|
||||
"optimize_option.")
|
||||
|
||||
quantizable_idx = []
|
||||
pass_cell = None
|
||||
for i, cell_and_name in enumerate(network.cells_and_names()):
|
||||
cell = cell_and_name[1]
|
||||
if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
|
||||
quantizable_idx.append(i)
|
||||
|
||||
if len(quantizable_idx) != len(strategy):
|
||||
raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")
|
||||
|
||||
quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
|
||||
type_map = {
|
||||
QuantDtype.INT2.num_bits: QuantDtype.INT2,
|
||||
QuantDtype.INT3.num_bits: QuantDtype.INT3,
|
||||
QuantDtype.INT4.num_bits: QuantDtype.INT4,
|
||||
QuantDtype.INT5.num_bits: QuantDtype.INT5,
|
||||
QuantDtype.INT6.num_bits: QuantDtype.INT6,
|
||||
QuantDtype.INT7.num_bits: QuantDtype.INT7,
|
||||
QuantDtype.INT8.num_bits: QuantDtype.INT8
|
||||
}
|
||||
if self.eps == 0:
|
||||
raise ValueError("`epsilon` is zero may lead to divide zero error")
|
||||
for i, cell_and_name in enumerate(network.cells_and_names()):
|
||||
cell = cell_and_name[1]
|
||||
if i not in quantizable_idx:
|
||||
continue
|
||||
if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
|
||||
cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0])
|
||||
if cell.weight_dtype is None:
|
||||
raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0])
|
||||
if isinstance(cell, nn.Conv2dBnAct):
|
||||
subcell_weight_para = cell.conv.weight.data.asnumpy()
|
||||
if hasattr(cell.conv, 'gamma'):
|
||||
scale_factor = (cell.conv.gamma.data.asnumpy() /
|
||||
np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
|
||||
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
|
||||
min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
|
||||
cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
|
||||
min_init=min_init,
|
||||
max_init=max_init)
|
||||
elif isinstance(cell, nn.DenseBnAct):
|
||||
subcell_weight_para = cell.dense.weight.data.asnumpy()
|
||||
if hasattr(cell.dense, 'gamma'):
|
||||
scale_factor = (cell.dense.gamma.data.asnumpy() /
|
||||
np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
|
||||
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
|
||||
min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
|
||||
cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
|
||||
min_init=min_init,
|
||||
max_init=max_init)
|
||||
return network
|
|
@ -1,462 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Quantization utils.
|
||||
|
||||
Note: This is an experimental interface that is subject to change and/or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from mindspore._checkparam import Validator
|
||||
from mindspore import log as logger
|
||||
from ... import nn
|
||||
|
||||
__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
|
||||
|
||||
|
||||
def cal_quantization_params(input_min,
|
||||
input_max,
|
||||
quant_min,
|
||||
quant_max,
|
||||
data_type,
|
||||
symmetric=False):
|
||||
r"""
|
||||
Calculate quantization params for scale and zero point.
|
||||
|
||||
Args:
|
||||
input_min (numpy.ndarray): The dimension of channel or 1.
|
||||
input_max (numpy.ndarray): The dimension of channel or 1.
|
||||
quant_min (int): The minimum quantization integer.
|
||||
quant_max (int): The maximum quantization integer.
|
||||
data_type (numpy type) : Can be numpy int8, numpy uint8.
|
||||
symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
|
||||
|
||||
Returns:
|
||||
scale (numpy.ndarray): quantization param.
|
||||
zero point (numpy.ndarray): quantization param.
|
||||
"""
|
||||
if quant_min == quant_max:
|
||||
raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.")
|
||||
|
||||
input_max = np.maximum(0.0, input_max)
|
||||
input_min = np.minimum(0.0, input_min)
|
||||
|
||||
if input_min.shape != input_max.shape:
|
||||
raise ValueError("input min shape should be equal to input max.")
|
||||
if len(input_min.shape) > 1:
|
||||
raise ValueError("input min and max shape should be one dim.")
|
||||
if (input_min > input_max).all():
|
||||
raise ValueError("input_min min should be less than input max.")
|
||||
if (input_max == input_min).all():
|
||||
return np.ones(input_min.shape), np.zeros(input_min.shape)
|
||||
|
||||
# calculate scale
|
||||
if symmetric:
|
||||
input_max = np.maximum(-input_min, input_max)
|
||||
input_min = -input_max
|
||||
scale = (input_max - input_min) / (quant_max - quant_min)
|
||||
|
||||
# calculate zero point
|
||||
if data_type == np.int8 and symmetric:
|
||||
zp = np.zeros(input_min.shape)
|
||||
else:
|
||||
if scale == 0.0:
|
||||
raise ValueError("scale can not be 0.")
|
||||
zp_double = quant_min - input_min / scale
|
||||
zp = np.floor(zp_double + 0.5)
|
||||
|
||||
return scale, zp
|
||||
|
||||
|
||||
def get_quant_min_max(data_type, num_bits=8, narrow_range=False):
|
||||
"""Calculate quantization params for minimum/maximum quantization integer"""
|
||||
if data_type == np.int8:
|
||||
quant_min = 0 - 2 ** (num_bits - 1)
|
||||
quant_max = 2 ** (num_bits - 1) - 1
|
||||
elif data_type == np.uint8:
|
||||
quant_min = 0
|
||||
quant_max = 2 ** num_bits - 1
|
||||
else:
|
||||
raise ValueError("Unsupported datatype({})".format(data_type))
|
||||
if narrow_range:
|
||||
quant_min = quant_min + 1
|
||||
return quant_min, quant_max
|
||||
|
||||
|
||||
def weight2int(data, scale, zero_point, quant_min, quant_max):
|
||||
r"""
|
||||
Calculate int8/uint8 weight from fp32. the formula is defined as:
|
||||
|
||||
.. math::
|
||||
int8/uint8 = round(float/scale) + offset
|
||||
|
||||
Args:
|
||||
data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
|
||||
scale (numpy.ndarray): The dimension of channel or 1.
|
||||
zero_point (numpy.ndarray): The dimension of channel or 1.
|
||||
quant_min (int): The minimum quantization integer.
|
||||
quant_max (int): The maximum quantization integer.
|
||||
|
||||
Returns:
|
||||
weight (numpy.ndarray): The dimension of channel or 1.
|
||||
"""
|
||||
if scale.shape != zero_point.shape:
|
||||
raise ValueError("`scale` and `zero_point` should have the same shape.")
|
||||
if scale.shape[0] < 0:
|
||||
raise ValueError("`scale` and `zero_point` shape should be greater than zero.")
|
||||
if 0 in scale:
|
||||
raise ValueError("Zero exist in `scale` which will lead to divide zero error.")
|
||||
if len(scale.shape) >= 1 and scale.shape[0] > 1:
|
||||
# for perchannel
|
||||
if scale.shape[0] == data.shape[0]:
|
||||
# `Conv2d` or `Dense` op weight
|
||||
shape_list = [-1] + [1] * len(data.shape[1:])
|
||||
scale = scale.reshape(shape_list)
|
||||
zero_point = zero_point.reshape(shape_list)
|
||||
elif scale.shape[0] == data.shape[1]:
|
||||
# `DepthwiseConv2d` op weight
|
||||
shape_list = [1, -1] + [1] * len(data.shape[2:])
|
||||
scale = scale.reshape(shape_list)
|
||||
zero_point = zero_point.reshape(shape_list)
|
||||
else:
|
||||
raise ValueError("Unsupported weight shape({})".format(data.shape))
|
||||
|
||||
weight_int = np.round((data / scale) + zero_point)
|
||||
weight_int[weight_int > quant_max] = quant_max
|
||||
weight_int[weight_int < quant_min] = quant_min
|
||||
return weight_int
|
||||
|
||||
|
||||
def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
|
||||
"""Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
|
||||
minq = cell.minq.data.asnumpy()
|
||||
maxq = cell.maxq.data.asnumpy()
|
||||
# make sure maxq > 0 and minq <= 0
|
||||
if cell.mode == 'LEARNED_SCALE':
|
||||
maxq = np.abs(maxq)
|
||||
minq = -np.abs(minq)
|
||||
quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range)
|
||||
symmetric = cell.symmetric and not cell.neg_trunc
|
||||
scale, zp = cal_quantization_params(
|
||||
minq, maxq,
|
||||
quant_min, quant_max, data_type,
|
||||
symmetric=symmetric)
|
||||
return scale, zp, maxq, minq
|
||||
|
||||
|
||||
def fold_batchnorm(weight, cell_quant):
|
||||
r"""
|
||||
Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
|
||||
|
||||
Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
|
||||
|
||||
Args:
|
||||
weight (numpy.ndarray): Weight of `cell_quant`.
|
||||
cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
|
||||
|
||||
Returns:
|
||||
weight (numpy.ndarray): Folded weight.
|
||||
bias (numpy.ndarray): Folded bias.
|
||||
"""
|
||||
variance = cell_quant.moving_variance.data.asnumpy()
|
||||
mean = cell_quant.moving_mean.data.asnumpy()
|
||||
gamma = cell_quant.gamma.data.asnumpy()
|
||||
beta = cell_quant.beta.data.asnumpy()
|
||||
epsilon = cell_quant.eps
|
||||
if epsilon == 0:
|
||||
raise ValueError("`epsilon` is zero may lead to divide zero error")
|
||||
sigma = np.sqrt(variance + epsilon)
|
||||
|
||||
if gamma.shape[0] == weight.shape[0]:
|
||||
# `Conv2d` or `Dense` op weight
|
||||
shape_list = [-1] + [1] * len(weight.shape[1:])
|
||||
_gamma = gamma.reshape(shape_list)
|
||||
_sigma = sigma.reshape(shape_list)
|
||||
elif gamma.shape[0] == weight.shape[1]:
|
||||
# `DepthwiseConv2d` op weight
|
||||
shape_list = [1, -1] + [1] * len(weight.shape[2:])
|
||||
_gamma = gamma.reshape(shape_list)
|
||||
_sigma = sigma.reshape(shape_list)
|
||||
else:
|
||||
raise ValueError("Unsupported weight shape({})".format(weight.shape))
|
||||
|
||||
weight = weight * _gamma / _sigma
|
||||
bias = beta - gamma * mean / sigma
|
||||
return weight, bias
|
||||
|
||||
|
||||
def without_fold_batchnorm(weight, cell_quant):
|
||||
r"""
|
||||
Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
|
||||
|
||||
Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
|
||||
|
||||
Args:
|
||||
weight (numpy.ndarray): Weight of `cell_quant`.
|
||||
cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
|
||||
|
||||
Returns:
|
||||
weight (numpy.ndarray): whihout folded weight.
|
||||
bias (numpy.ndarray): without folded bias.
|
||||
"""
|
||||
variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
|
||||
mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
|
||||
gamma = cell_quant.batchnorm.gamma.data.asnumpy()
|
||||
beta = cell_quant.batchnorm.beta.data.asnumpy()
|
||||
epsilon = cell_quant.batchnorm.eps
|
||||
if epsilon == 0:
|
||||
raise ValueError("`epsilon` is zero may lead to divide zero error")
|
||||
sigma = np.sqrt(variance + epsilon)
|
||||
|
||||
if gamma.shape[0] == weight.shape[0]:
|
||||
# `Conv2d` or `Dense` op weight
|
||||
shape_list = [-1] + [1] * len(weight.shape[1:])
|
||||
_gamma = gamma.reshape(shape_list)
|
||||
_sigma = sigma.reshape(shape_list)
|
||||
elif gamma.shape[0] == weight.shape[1]:
|
||||
# `DepthwiseConv2d` op weight
|
||||
shape_list = [1, -1] + [1] * len(weight.shape[2:])
|
||||
_gamma = gamma.reshape(shape_list)
|
||||
_sigma = sigma.reshape(shape_list)
|
||||
else:
|
||||
raise ValueError("Unsupported weight shape({})".format(weight.shape))
|
||||
|
||||
weight = weight * _gamma / _sigma
|
||||
bias = beta - gamma * mean / sigma
|
||||
return weight, bias
|
||||
|
||||
|
||||
def compute_kl_threshold(data, bitwidth):
|
||||
r"""
|
||||
Using KL-J Distance to calculate the clip threshold.
|
||||
|
||||
Args:
|
||||
- **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
|
||||
- **bitwidth** (QuantDtype) - The datatype of quantization.
|
||||
Outputs:
|
||||
Tensor with Shape 1. Threshold to calculate the data.
|
||||
"""
|
||||
data_max = np.abs(data).max()
|
||||
if data_max < 1e-5:
|
||||
return 1e-5
|
||||
hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
|
||||
# For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
|
||||
# largest size, turn to use the default bins config.
|
||||
largest_bin_size = 1024
|
||||
if hist.shape[0] > largest_bin_size:
|
||||
hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
|
||||
sum_ = np.sum(hist)
|
||||
if sum_ == 0:
|
||||
hist = 0
|
||||
else:
|
||||
hist = hist / sum_
|
||||
cumsum = np.cumsum(hist)
|
||||
bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
|
||||
threshold = []
|
||||
scaling_factor = []
|
||||
kl = []
|
||||
if bit_pow_range + 1 > len(bin_edges) - 1:
|
||||
th_layer_out = bin_edges[-1]
|
||||
return float(th_layer_out)
|
||||
for i in range(bit_pow_range + 1, len(bin_edges), 1):
|
||||
threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
|
||||
threshold = np.concatenate((threshold, [threshold_tmp]))
|
||||
scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
|
||||
scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
|
||||
# forward interpolation
|
||||
cumsum_tmp = np.copy(cumsum)
|
||||
cumsum_tmp[(i - 1):] = 1
|
||||
fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
|
||||
fwd_xp = np.linspace(0.0, 1.0, i)
|
||||
fwd_fp = cumsum_tmp[:i]
|
||||
forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
|
||||
# backward interpolation
|
||||
bwd_x = np.linspace(0.0, 1.0, i)
|
||||
bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
|
||||
bwd_fp = forward_interp
|
||||
backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
|
||||
cumsum_tmp[:i] = backward_interp
|
||||
if 0 in cumsum_tmp:
|
||||
raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error")
|
||||
kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp)) # Kullback-Leibler-J
|
||||
kl = np.concatenate((kl, [kl_tmp]))
|
||||
th_layer_out = threshold[np.argmin(kl)]
|
||||
threshold = float(th_layer_out)
|
||||
if threshold < 1e-5:
|
||||
threshold = 1e-5
|
||||
return threshold
|
||||
|
||||
|
||||
def query_quant_layers(network):
|
||||
r"""
|
||||
Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
|
||||
quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
|
||||
layers, which not exist in practical execution, may appear.
|
||||
|
||||
Args:
|
||||
network (Cell): input network
|
||||
|
||||
Examples:
|
||||
>>> from mindspore.compression.quant import QuantizationAwareTraining
|
||||
>>> from mindspore.compression.quant.quant_utils import query_quant_layers
|
||||
>>> class LeNet5(nn.Cell):
|
||||
... def __init__(self, num_class=10, channel=1):
|
||||
... super(LeNet5, self).__init__()
|
||||
... self.type = "fusion"
|
||||
... self.num_class = num_class
|
||||
...
|
||||
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
|
||||
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
|
||||
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
|
||||
... # change `nn.Dense` to `nn.DenseBnAct`
|
||||
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
|
||||
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
|
||||
... self.fc3 = nn.DenseBnAct(84, self.num_class)
|
||||
...
|
||||
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||
... self.flatten = nn.Flatten()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... x = self.conv1(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.conv2(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.flatten(x)
|
||||
... x = self.fc1(x)
|
||||
... x = self.fc2(x)
|
||||
... x = self.fc3(x)
|
||||
... return x
|
||||
...
|
||||
>>> net = LeNet5()
|
||||
>>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
|
||||
>>> net_qat = quantizer.quantize(net)
|
||||
>>> query_quant_layers(net_qat)
|
||||
conv1.conv.fake_quant_weight INT8
|
||||
conv1.activation.fake_quant_act INT8
|
||||
conv2.conv.fake_quant_weight INT8
|
||||
conv2.activation.fake_quant_act INT8
|
||||
fc1.dense.fake_quant_weight INT8
|
||||
fc1.activation.fake_quant_act INT8
|
||||
fc2.dense.fake_quant_weight INT8
|
||||
fc2.activation.fake_quant_act INT8
|
||||
fc3.dense.fake_quant_weight INT8
|
||||
fc3.activation.fake_quant_act INT8
|
||||
"""
|
||||
network = Validator.check_isinstance("network", network, nn.Cell)
|
||||
tplt = "{0:60}\t{1:10}"
|
||||
for cell_and_name in network.cells_and_names():
|
||||
cell_name = cell_and_name[0]
|
||||
cell = cell_and_name[1]
|
||||
if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
|
||||
logger.info(tplt.format(cell_name, cell.quant_dtype))
|
||||
|
||||
|
||||
def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
|
||||
r"""
|
||||
Load fp32 model parameters into quantization model.
|
||||
|
||||
Args:
|
||||
quant_model(Cell): Quantization model.
|
||||
params_dict(dict): Parameter dict that stores fp32 parameters.
|
||||
quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
|
||||
network. Default: None.
|
||||
|
||||
Raises:
|
||||
TypeError: If `quant_new_params` is not None and is not list.
|
||||
ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
|
||||
nor in `quant_new_params`.
|
||||
|
||||
Examples:
|
||||
>>> import mindspore as ms
|
||||
>>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
|
||||
>>> class LeNet5(nn.Cell):
|
||||
... def __init__(self, num_class=10, channel=1):
|
||||
... super(LeNet5, self).__init__()
|
||||
... self.type = "fusion"
|
||||
... self.num_class = num_class
|
||||
...
|
||||
... # change `nn.Conv2d` to `nn.Conv2dBnAct`
|
||||
... self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
|
||||
... self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
|
||||
... # change `nn.Dense` to `nn.DenseBnAct`
|
||||
... self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
|
||||
... self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
|
||||
... self.fc3 = nn.DenseBnAct(84, self.num_class)
|
||||
...
|
||||
... self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||
... self.flatten = nn.Flatten()
|
||||
...
|
||||
... def construct(self, x):
|
||||
... x = self.conv1(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.conv2(x)
|
||||
... x = self.max_pool2d(x)
|
||||
... x = self.flatten(x)
|
||||
... x = self.fc1(x)
|
||||
... x = self.fc2(x)
|
||||
... x = self.fc3(x)
|
||||
... return x
|
||||
...
|
||||
>>> net = LeNet5()
|
||||
>>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt"
|
||||
>>> param_dict = ms.load_checkpoint(ckpt_file_name)
|
||||
>>> load_nonquant_param_into_quant_net(net, param_dict)
|
||||
"""
|
||||
if quant_new_params is not None and not isinstance(quant_new_params, list):
|
||||
raise TypeError("quant_new_params must be list or None.")
|
||||
iterable_dict = {
|
||||
'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
|
||||
'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
|
||||
'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
|
||||
}
|
||||
for param in params_dict.items():
|
||||
key_name = param[0].split(".")[-1]
|
||||
if key_name not in iterable_dict:
|
||||
iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value),
|
||||
params_dict.items())))
|
||||
|
||||
for name, param in quant_model.parameters_and_names():
|
||||
key_name = name.split(".")[-1]
|
||||
if key_name not in iterable_dict.keys():
|
||||
if key_name not in quant_new_params:
|
||||
raise ValueError(f"Can't find match parameter in ckpt, param name = {name}")
|
||||
continue
|
||||
value_param = next(iterable_dict[key_name], None)
|
||||
if value_param:
|
||||
param.set_data(value_param[1].data)
|
||||
logger.info(f'init model param {name} with checkpoint param {value_param[0]}')
|
||||
|
||||
# Perform KL_init when learned scale quantization is executed.
|
||||
for cell_and_name in quant_model.cells_and_names():
|
||||
cell = cell_and_name[1]
|
||||
if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
|
||||
nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
|
||||
subcell_weight_para = cell.weight.data.asnumpy()
|
||||
if hasattr(cell, 'gamma'):
|
||||
scale_factor = (cell.gamma.data.asnumpy() /
|
||||
np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
|
||||
subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
|
||||
|
||||
if cell.fake_quant_weight.per_channel:
|
||||
max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
|
||||
for weight_para_each in subcell_weight_para]
|
||||
min_init = [-x for x in max_init]
|
||||
else:
|
||||
max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
|
||||
min_init = [-x for x in max_init]
|
||||
|
||||
cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
|
||||
min_init=min_init, max_init=max_init)
|
|
@ -1,68 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Note:
|
||||
Base Class of Quantizer. This is interface that is subject to change or deletion.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
|
||||
from mindspore._checkparam import Validator
|
||||
|
||||
__all__ = ["OptimizeOption"]
|
||||
|
||||
|
||||
class OptimizeOption(Enum):
|
||||
r"""
|
||||
An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
|
||||
"""
|
||||
# using quantization aware training
|
||||
QAT = "QAT"
|
||||
|
||||
# using the learned scale quantization
|
||||
LEARNED_SCALE = "LEARNED_SCALE"
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
|
||||
class Quantizer(ABC):
|
||||
"""
|
||||
Base class of Quantizer. You can implement different kind of quantizer to get different quantization result.
|
||||
|
||||
Notes:
|
||||
This class is an abstract class.
|
||||
|
||||
Args:
|
||||
optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default:
|
||||
OptimizeOption.QAT.
|
||||
"""
|
||||
def __init__(self,
|
||||
optimize_option=OptimizeOption.QAT):
|
||||
if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple):
|
||||
optimize_option = [optimize_option]
|
||||
for option in optimize_option:
|
||||
option = Validator.check_isinstance("optimize_option", option, OptimizeOption)
|
||||
self.optimize_option = optimize_option
|
||||
|
||||
@abstractmethod
|
||||
def quantize(self, network):
|
||||
"""
|
||||
Quant API to convert input network to a quantization aware training network
|
||||
Args:
|
||||
network (Cell): network to be quantized.
|
||||
"""
|
|
@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network.
|
|||
from __future__ import absolute_import
|
||||
|
||||
from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \
|
||||
image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
|
||||
image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
|
||||
from mindspore.nn.layer.activation import *
|
||||
from mindspore.nn.layer.normalization import *
|
||||
from mindspore.nn.layer.container import *
|
||||
|
@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import *
|
|||
from mindspore.nn.layer.embedding import *
|
||||
from mindspore.nn.layer.pooling import *
|
||||
from mindspore.nn.layer.image import *
|
||||
from mindspore.nn.layer.quant import *
|
||||
from mindspore.nn.layer.math import *
|
||||
from mindspore.nn.layer.combined import *
|
||||
from mindspore.nn.layer.timedistributed import *
|
||||
|
@ -53,7 +52,6 @@ __all__.extend(basic.__all__)
|
|||
__all__.extend(embedding.__all__)
|
||||
__all__.extend(pooling.__all__)
|
||||
__all__.extend(image.__all__)
|
||||
__all__.extend(quant.__all__)
|
||||
__all__.extend(math.__all__)
|
||||
__all__.extend(combined.__all__)
|
||||
__all__.extend(timedistributed.__all__)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,7 +25,6 @@ import stat
|
|||
import threading
|
||||
from threading import Thread, Lock
|
||||
from collections import defaultdict, OrderedDict
|
||||
from functools import wraps
|
||||
from io import BytesIO
|
||||
|
||||
import math
|
||||
|
@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter
|
|||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.common._utils import is_shape_unknown
|
||||
from mindspore.communication.management import get_rank, get_group_size
|
||||
from mindspore.compression.export import quant_export
|
||||
from mindspore.experimental import MapParameter
|
||||
from mindspore.parallel._cell_wrapper import get_allgather_cell
|
||||
from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
|
||||
|
@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
|
|||
|
||||
kwargs (dict): Configuration options dictionary.
|
||||
|
||||
- quant_mode (str): If the network is a quantization aware training network, the quant_mode should
|
||||
be set to "QUANT", else the quant_mode should be set to "NONQUANT".
|
||||
- mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network.
|
||||
Default: 127.5.
|
||||
- std_dev (float): The variance of input data after preprocessing,
|
||||
used for quantizing the first layer of the network. Default: 127.5.
|
||||
- enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32.
|
||||
- enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set.
|
||||
|
||||
|
@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
|
|||
inputs = tuple(inputs_col)
|
||||
|
||||
file_name = os.path.realpath(file_name)
|
||||
net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
|
||||
if 'enc_key' in kwargs.keys():
|
||||
kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs)
|
||||
_export(net, file_name, file_format, *inputs, **kwargs)
|
||||
|
@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset):
|
|||
model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False
|
||||
|
||||
|
||||
def quant_mode_manage(func):
|
||||
"""Inherit the quant_mode in old version."""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(network, *inputs, file_format, **kwargs):
|
||||
if 'quant_mode' not in kwargs:
|
||||
return network
|
||||
quant_mode = kwargs.get('quant_mode')
|
||||
if not isinstance(quant_mode, str):
|
||||
raise TypeError("For 'export', the type of 'quant_mode' should be string, "
|
||||
"but got {}.".format(type(quant_mode)))
|
||||
if quant_mode in ('AUTO', 'MANUAL'):
|
||||
kwargs['quant_mode'] = 'QUANT'
|
||||
return func(network, *inputs, file_format=file_format, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@quant_mode_manage
|
||||
def _quant_export(network, *inputs, file_format, **kwargs):
|
||||
"""Exports MindSpore quantization predict model to deploy with AIR and MINDIR."""
|
||||
supported_device = ["Ascend", "GPU"]
|
||||
supported_formats = ['AIR', 'MINDIR']
|
||||
quant_mode_formats = ['QUANT', 'NONQUANT']
|
||||
|
||||
quant_mode = kwargs['quant_mode']
|
||||
if quant_mode not in quant_mode_formats:
|
||||
raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, "
|
||||
f"but got {quant_mode}.")
|
||||
if quant_mode == 'NONQUANT':
|
||||
return network
|
||||
quant_net = copy.deepcopy(network)
|
||||
quant_net._create_time = int(time.time() * 1e9)
|
||||
|
||||
mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean')
|
||||
std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev')
|
||||
mean = Validator.check_value_type("mean", mean, (int, float))
|
||||
std_dev = Validator.check_value_type("std_dev", std_dev, (int, float))
|
||||
|
||||
if context.get_context('device_target') not in supported_device:
|
||||
raise KeyError(f"For 'export', quant export only support {supported_device} device target now, "
|
||||
f"but got {context.get_context('device_target')}")
|
||||
|
||||
if file_format not in supported_formats:
|
||||
raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, "
|
||||
f"but got {file_format}.")
|
||||
|
||||
quant_net.set_train(False)
|
||||
if file_format == "MINDIR":
|
||||
exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True)
|
||||
else:
|
||||
exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs)
|
||||
deploy_net = exporter.run()
|
||||
return deploy_net
|
||||
|
||||
|
||||
def parse_print(print_file_name):
|
||||
"""
|
||||
Parse data file generated by mindspore.ops.Print.
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
network config setting, will be used in test_lenet_quant.py
|
||||
"""
|
||||
|
||||
from easydict import EasyDict as edict
|
||||
|
||||
quant_cfg = edict({
|
||||
'num_classes': 10,
|
||||
'lr': 0.01,
|
||||
'momentum': 0.9,
|
||||
'epoch_size': 10,
|
||||
'batch_size': 64,
|
||||
'buffer_size': 1000,
|
||||
'image_height': 32,
|
||||
'image_width': 32,
|
||||
'keep_checkpoint_max': 10,
|
||||
})
|
|
@ -1,60 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Produce the dataset
|
||||
"""
|
||||
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.vision as CV
|
||||
import mindspore.dataset.transforms as C
|
||||
from mindspore.dataset.vision import Inter
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
|
||||
def create_dataset(data_path, batch_size=32, repeat_size=1,
|
||||
num_parallel_workers=1):
|
||||
"""
|
||||
create dataset for train or test
|
||||
"""
|
||||
# define dataset
|
||||
mnist_ds = ds.MnistDataset(data_path)
|
||||
|
||||
resize_height, resize_width = 32, 32
|
||||
rescale = 1.0 / 255.0
|
||||
shift = 0.0
|
||||
rescale_nml = 1 / 0.3081
|
||||
shift_nml = -1 * 0.1307 / 0.3081
|
||||
|
||||
# define map operations
|
||||
resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode
|
||||
rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
|
||||
rescale_op = CV.Rescale(rescale, shift)
|
||||
hwc2chw_op = CV.HWC2CHW()
|
||||
type_cast_op = C.TypeCast(mstype.int32)
|
||||
|
||||
# apply map operations on images
|
||||
mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
|
||||
mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||
mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||
mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||
mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
|
||||
|
||||
# apply DatasetOps
|
||||
buffer_size = 10000
|
||||
mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script
|
||||
mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
|
||||
mnist_ds = mnist_ds.repeat(repeat_size)
|
||||
|
||||
return mnist_ds
|
|
@ -1,58 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""LeNet."""
|
||||
import mindspore.nn as nn
|
||||
|
||||
|
||||
class LeNet5(nn.Cell):
|
||||
"""
|
||||
Lenet network
|
||||
|
||||
Args:
|
||||
num_class (int): Num classes. Default: 10.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor
|
||||
Examples:
|
||||
>>> LeNet(num_class=10)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, num_class=10, channel=1):
|
||||
super(LeNet5, self).__init__()
|
||||
self.type = "fusion"
|
||||
self.num_class = num_class
|
||||
|
||||
# change `nn.Conv2d` to `nn.Conv2dBnAct`
|
||||
self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
|
||||
self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
|
||||
# change `nn.Dense` to `nn.DenseBnAct`
|
||||
self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
|
||||
self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
|
||||
self.fc3 = nn.DenseBnAct(84, self.num_class)
|
||||
|
||||
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||
self.flatten = nn.Flatten()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.max_pool2d(x)
|
||||
x = self.conv2(x)
|
||||
x = self.max_pool2d(x)
|
||||
x = self.flatten(x)
|
||||
x = self.fc1(x)
|
||||
x = self.fc2(x)
|
||||
x = self.fc3(x)
|
||||
return x
|
|
@ -1,199 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
train and infer lenet quantization network
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore.common import dtype as mstype
|
||||
import mindspore.nn as nn
|
||||
from mindspore.train.metrics import Accuracy
|
||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
|
||||
from mindspore import load_checkpoint, load_param_into_net, export
|
||||
from mindspore.train import Model
|
||||
from mindspore.compression.quant import QuantizationAwareTraining
|
||||
from mindspore.compression.quant.quantizer import OptimizeOption
|
||||
from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
|
||||
from dataset import create_dataset
|
||||
from config import quant_cfg
|
||||
from lenet_fusion import LeNet5 as LeNet5Fusion
|
||||
import numpy as np
|
||||
|
||||
data_path = "/home/workspace/mindspore_dataset/mnist"
|
||||
lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt"
|
||||
|
||||
def train_lenet_quant(optim_option="QAT"):
|
||||
cfg = quant_cfg
|
||||
ckpt_path = lenet_ckpt_path
|
||||
ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1)
|
||||
step_size = ds_train.get_dataset_size()
|
||||
|
||||
# define fusion network
|
||||
network = LeNet5Fusion(cfg.num_classes)
|
||||
|
||||
# load quantization aware network checkpoint
|
||||
param_dict = load_checkpoint(ckpt_path)
|
||||
load_nonquant_param_into_quant_net(network, param_dict)
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
if optim_option == "LEARNED_SCALE":
|
||||
quant_optim_otions = OptimizeOption.LEARNED_SCALE
|
||||
quantizer = QuantizationAwareTraining(bn_fold=False,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, True],
|
||||
narrow_range=[True, True],
|
||||
freeze_bn=0,
|
||||
quant_delay=0,
|
||||
one_conv_fold=True,
|
||||
optimize_option=quant_optim_otions)
|
||||
else:
|
||||
quantizer = QuantizationAwareTraining(quant_delay=900,
|
||||
bn_fold=False,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
network = quantizer.quantize(network)
|
||||
|
||||
# define network loss
|
||||
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
|
||||
# define network optimization
|
||||
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
|
||||
|
||||
# call back and monitor
|
||||
config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size,
|
||||
keep_checkpoint_max=cfg.keep_checkpoint_max)
|
||||
ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt)
|
||||
|
||||
# define model
|
||||
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()],
|
||||
dataset_sink_mode=True)
|
||||
print("============== End Training ==============")
|
||||
|
||||
|
||||
def eval_quant(optim_option="QAT"):
|
||||
cfg = quant_cfg
|
||||
ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1)
|
||||
ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt'
|
||||
# define fusion network
|
||||
network = LeNet5Fusion(cfg.num_classes)
|
||||
# convert fusion network to quantization aware network
|
||||
if optim_option == "LEARNED_SCALE":
|
||||
quant_optim_otions = OptimizeOption.LEARNED_SCALE
|
||||
quantizer = QuantizationAwareTraining(bn_fold=False,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, True],
|
||||
narrow_range=[True, True],
|
||||
freeze_bn=0,
|
||||
quant_delay=0,
|
||||
one_conv_fold=True,
|
||||
optimize_option=quant_optim_otions)
|
||||
else:
|
||||
quantizer = QuantizationAwareTraining(quant_delay=0,
|
||||
bn_fold=False,
|
||||
freeze_bn=10000,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
network = quantizer.quantize(network)
|
||||
|
||||
# define loss
|
||||
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
|
||||
# define network optimization
|
||||
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
|
||||
|
||||
# call back and monitor
|
||||
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
|
||||
|
||||
# load quantization aware network checkpoint
|
||||
param_dict = load_checkpoint(ckpt_path)
|
||||
not_load_param = load_param_into_net(network, param_dict)
|
||||
if not_load_param:
|
||||
raise ValueError("Load param into net fail!")
|
||||
|
||||
print("============== Starting Testing ==============")
|
||||
acc = model.eval(ds_eval, dataset_sink_mode=True)
|
||||
print("============== {} ==============".format(acc))
|
||||
assert acc['Accuracy'] > 0.98
|
||||
|
||||
|
||||
def export_lenet(optim_option="QAT", file_format="MINDIR"):
|
||||
cfg = quant_cfg
|
||||
# define fusion network
|
||||
network = LeNet5Fusion(cfg.num_classes)
|
||||
# convert fusion network to quantization aware network
|
||||
if optim_option == "LEARNED_SCALE":
|
||||
quant_optim_otions = OptimizeOption.LEARNED_SCALE
|
||||
quantizer = QuantizationAwareTraining(bn_fold=False,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, True],
|
||||
narrow_range=[True, True],
|
||||
freeze_bn=0,
|
||||
quant_delay=0,
|
||||
one_conv_fold=True,
|
||||
optimize_option=quant_optim_otions)
|
||||
else:
|
||||
quantizer = QuantizationAwareTraining(quant_delay=0,
|
||||
bn_fold=False,
|
||||
freeze_bn=10000,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
network = quantizer.quantize(network)
|
||||
|
||||
# export network
|
||||
inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32)
|
||||
export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO')
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_lenet_quant():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
train_lenet_quant()
|
||||
eval_quant()
|
||||
export_lenet()
|
||||
train_lenet_quant(optim_option="LEARNED_SCALE")
|
||||
eval_quant(optim_option="LEARNED_SCALE")
|
||||
export_lenet(optim_option="LEARNED_SCALE")
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_lenet_quant_ascend():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
train_lenet_quant(optim_option="LEARNED_SCALE")
|
||||
eval_quant(optim_option="LEARNED_SCALE")
|
||||
export_lenet(optim_option="LEARNED_SCALE", file_format="AIR")
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_lenet_quant_ascend_pynative():
|
||||
"""
|
||||
test_lenet_quant_ascend_pynative
|
||||
Features: test_lenet_quant_ascend_pynative
|
||||
Description: test_lenet_quant_ascend_pynative pynative mode
|
||||
Expectation: None
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
|
||||
train_lenet_quant(optim_option="QAT")
|
|
@ -1,67 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
""" create train dataset. """
|
||||
|
||||
from functools import partial
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.vision as C
|
||||
import mindspore.dataset.transforms as C2
|
||||
|
||||
|
||||
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
|
||||
"""
|
||||
create a train dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
config(EasyDict):the basic config for training
|
||||
repeat_num(int): the repeat times of dataset. Default: 1.
|
||||
batch_size(int): the batch size of dataset. Default: 32.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
|
||||
load_func = partial(ds.Cifar10Dataset, dataset_path)
|
||||
cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
|
||||
|
||||
resize_height = config.image_height
|
||||
resize_width = config.image_width
|
||||
rescale = 1.0 / 255.0
|
||||
shift = 0.0
|
||||
|
||||
# define map operations
|
||||
# interpolation default BILINEAR
|
||||
resize_op = C.Resize((resize_height, resize_width))
|
||||
rescale_op = C.Rescale(rescale, shift)
|
||||
normalize_op = C.Normalize(
|
||||
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
|
||||
changeswap_op = C.HWC2CHW()
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
|
||||
|
||||
# apply map operations on images
|
||||
cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
|
||||
cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
|
||||
|
||||
# apply batch operations
|
||||
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
cifar_ds = cifar_ds.repeat(repeat_num)
|
||||
|
||||
return cifar_ds
|
|
@ -1,56 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
global_step(int): total steps of the training
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_end + \
|
||||
(lr_max - lr_end) * \
|
||||
(1. + math.cos(math.pi * (i - warmup_steps) /
|
||||
(total_steps - warmup_steps))) / 2.
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
current_step = global_step
|
||||
lr_each_step = np.array(lr_each_step).astype(np.float32)
|
||||
learning_rate = lr_each_step[current_step:]
|
||||
|
||||
return learning_rate
|
|
@ -1,263 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""MobileNetV2 Quant model define"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import Tensor
|
||||
|
||||
__all__ = ['mobilenetV2']
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class GlobalAvgPooling(nn.Cell):
|
||||
"""
|
||||
Global avg pooling definition.
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> GlobalAvgPooling()
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(GlobalAvgPooling, self).__init__()
|
||||
self.mean = P.ReduceMean(keep_dims=False)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.mean(x, (2, 3))
|
||||
return x
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Cell):
|
||||
"""
|
||||
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
|
||||
|
||||
Args:
|
||||
in_planes (int): Input channel.
|
||||
out_planes (int): Output channel.
|
||||
kernel_size (int): Input kernel size.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
|
||||
"""
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
|
||||
stride=stride,
|
||||
pad_mode='pad',
|
||||
padding=padding,
|
||||
group=groups,
|
||||
has_bn=True,
|
||||
activation='relu')
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv(x)
|
||||
return x
|
||||
|
||||
|
||||
class InvertedResidual(nn.Cell):
|
||||
"""
|
||||
Mobilenetv2 residual block definition.
|
||||
|
||||
Args:
|
||||
inp (int): Input channel.
|
||||
oup (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
expand_ratio (int): expand ration of input channel
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResidualBlock(3, 256, 1, 1)
|
||||
"""
|
||||
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim,
|
||||
stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
|
||||
pad_mode='pad', padding=0, group=1, has_bn=True)
|
||||
])
|
||||
self.conv = nn.SequentialCell(layers)
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
out = self.conv(x)
|
||||
if self.use_res_connect:
|
||||
out = self.add(out, x)
|
||||
return out
|
||||
|
||||
|
||||
class mobilenetV2(nn.Cell):
|
||||
"""
|
||||
mobilenetV2 fusion architecture.
|
||||
|
||||
Args:
|
||||
class_num (Cell): number of classes.
|
||||
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
|
||||
has_dropout (bool): Is dropout used. Default is false
|
||||
inverted_residual_setting (list): Inverted residual settings. Default is None
|
||||
round_nearest (list): Channel round to . Default is 8
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> mobilenetV2(num_classes=1000)
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=1000, width_mult=1.,
|
||||
has_dropout=False, inverted_residual_setting=None, round_nearest=8):
|
||||
super(mobilenetV2, self).__init__()
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
# setting of inverted residual blocks
|
||||
self.cfgs = inverted_residual_setting
|
||||
if inverted_residual_setting is None:
|
||||
self.cfgs = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(
|
||||
input_channel * width_mult, round_nearest)
|
||||
self.out_channels = _make_divisible(
|
||||
last_channel * max(1.0, width_mult), round_nearest)
|
||||
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in self.cfgs:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(
|
||||
block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(
|
||||
input_channel, self.out_channels, kernel_size=1))
|
||||
# make it nn.CellList
|
||||
self.features = nn.SequentialCell(features)
|
||||
# mobilenet head
|
||||
head = ([GlobalAvgPooling(),
|
||||
nn.DenseBnAct(self.out_channels, num_classes,
|
||||
has_bias=True, has_bn=False)
|
||||
] if not has_dropout else
|
||||
[GlobalAvgPooling(),
|
||||
nn.Dropout(0.2),
|
||||
nn.DenseBnAct(self.out_channels, num_classes,
|
||||
has_bias=True, has_bn=False)
|
||||
])
|
||||
self.head = nn.SequentialCell(head)
|
||||
|
||||
# init weights
|
||||
self.init_parameters_data()
|
||||
self._initialize_weights()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.features(x)
|
||||
x = self.head(x)
|
||||
return x
|
||||
|
||||
def _initialize_weights(self):
|
||||
"""
|
||||
Initialize weights.
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
None.
|
||||
|
||||
Examples:
|
||||
>>> _initialize_weights()
|
||||
"""
|
||||
self.init_parameters_data()
|
||||
for _, m in self.cells_and_names():
|
||||
np.random.seed(1)
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
|
||||
m.weight.data.shape).astype("float32"))
|
||||
m.weight.set_data(w)
|
||||
if m.bias is not None:
|
||||
m.bias.set_data(
|
||||
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.Conv2dBnAct):
|
||||
n = m.conv.kernel_size[0] * \
|
||||
m.conv.kernel_size[1] * m.conv.out_channels
|
||||
w = Tensor(np.random.normal(0, np.sqrt(2. / n),
|
||||
m.conv.weight.data.shape).astype("float32"))
|
||||
m.conv.weight.set_data(w)
|
||||
if m.conv.bias is not None:
|
||||
m.conv.bias.set_data(
|
||||
Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.gamma.set_data(
|
||||
Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
|
||||
m.beta.set_data(
|
||||
Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.Dense):
|
||||
m.weight.set_data(Tensor(np.random.normal(
|
||||
0, 0.01, m.weight.data.shape).astype("float32")))
|
||||
if m.bias is not None:
|
||||
m.bias.set_data(
|
||||
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
|
||||
elif isinstance(m, nn.DenseBnAct):
|
||||
m.dense.weight.set_data(
|
||||
Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
|
||||
if m.dense.bias is not None:
|
||||
m.dense.bias.set_data(
|
||||
Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
|
|
@ -1,136 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Train Mobilenetv2_quant on Cifar10"""
|
||||
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.compression.quant import QuantizationAwareTraining
|
||||
from mindspore.common import set_seed
|
||||
|
||||
from dataset import create_dataset
|
||||
from lr_generator import get_lr
|
||||
from utils import Monitor, CrossEntropyWithLabelSmooth
|
||||
from mobilenetV2 import mobilenetV2
|
||||
|
||||
config_ascend_quant = ed({
|
||||
"num_classes": 10,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 200,
|
||||
"step_threshold": 10,
|
||||
"data_load_mode": "mindata",
|
||||
"epoch_size": 1,
|
||||
"start_epoch": 200,
|
||||
"warmup_epochs": 1,
|
||||
"lr": 0.3,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 300,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
||||
|
||||
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
|
||||
|
||||
|
||||
def train():
|
||||
"""train"""
|
||||
config = config_ascend_quant
|
||||
print("training configure: {}".format(config))
|
||||
|
||||
epoch_size = config.epoch_size
|
||||
|
||||
# define network
|
||||
network = mobilenetV2(num_classes=config.num_classes)
|
||||
# define loss
|
||||
if config.label_smooth > 0:
|
||||
loss = CrossEntropyWithLabelSmooth(
|
||||
smooth_factor=config.label_smooth, num_classes=config.num_classes)
|
||||
else:
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
# define dataset
|
||||
dataset = create_dataset(dataset_path=dataset_path,
|
||||
config=config,
|
||||
repeat_num=1,
|
||||
batch_size=config.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
quantizer = QuantizationAwareTraining(bn_fold=True,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
network = quantizer.quantize(network)
|
||||
|
||||
# get learning rate
|
||||
lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
|
||||
lr_init=0,
|
||||
lr_end=0,
|
||||
lr_max=config.lr,
|
||||
warmup_epochs=config.warmup_epochs,
|
||||
total_epochs=epoch_size + config.start_epoch,
|
||||
steps_per_epoch=step_size))
|
||||
|
||||
# define optimization
|
||||
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
|
||||
config.weight_decay)
|
||||
# define model
|
||||
model = Model(network, loss_fn=loss, optimizer=opt)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
monitor = Monitor(lr_init=lr.asnumpy(),
|
||||
step_threshold=config.step_threshold)
|
||||
callback = [monitor]
|
||||
model.train(epoch_size, dataset, callbacks=callback,
|
||||
dataset_sink_mode=False)
|
||||
print("============== End Training ==============")
|
||||
|
||||
export_time_used = 650
|
||||
train_time = monitor.step_mseconds
|
||||
print('train_time_used:{}'.format(train_time))
|
||||
assert train_time < export_time_used
|
||||
expect_avg_step_loss = 2.32
|
||||
avg_step_loss = np.mean(np.array(monitor.losses))
|
||||
print("average step loss:{}".format(avg_step_loss))
|
||||
assert avg_step_loss < expect_avg_step_loss
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_mobilenetv2_quant():
|
||||
"""
|
||||
test_mobilenetv2_quant
|
||||
Features: test_mobilenetv2_quant
|
||||
Description: test_mobilenetv2_quant graph mode
|
||||
Expectation: None
|
||||
"""
|
||||
set_seed(1)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
train()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_mobilenetv2_quant()
|
|
@ -1,121 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Train Mobilenetv2_quant gpu on Cifar10"""
|
||||
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.compression.quant import QuantizationAwareTraining
|
||||
from mindspore.common import set_seed
|
||||
|
||||
from dataset import create_dataset
|
||||
from lr_generator import get_lr
|
||||
from utils import Monitor, CrossEntropyWithLabelSmooth
|
||||
from mobilenetV2 import mobilenetV2
|
||||
|
||||
config_ascend_quant = ed({
|
||||
"num_classes": 10,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 300,
|
||||
"step_threshold": 10,
|
||||
"data_load_mode": "mindata",
|
||||
"epoch_size": 1,
|
||||
"start_epoch": 200,
|
||||
"warmup_epochs": 1,
|
||||
"lr": 0.05,
|
||||
"momentum": 0.997,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 300,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
||||
|
||||
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
|
||||
|
||||
@pytest.mark.level2
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_mobilenetv2_quant():
|
||||
set_seed(1)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
config = config_ascend_quant
|
||||
print("training configure: {}".format(config))
|
||||
|
||||
epoch_size = config.epoch_size
|
||||
|
||||
# define network
|
||||
network = mobilenetV2(num_classes=config.num_classes)
|
||||
# define loss
|
||||
if config.label_smooth > 0:
|
||||
loss = CrossEntropyWithLabelSmooth(
|
||||
smooth_factor=config.label_smooth, num_classes=config.num_classes)
|
||||
else:
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
||||
# define dataset
|
||||
dataset = create_dataset(dataset_path=dataset_path,
|
||||
config=config,
|
||||
repeat_num=1,
|
||||
batch_size=config.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
quantizer = QuantizationAwareTraining(bn_fold=True,
|
||||
per_channel=[True, False],
|
||||
symmetric=[False, False])
|
||||
network = quantizer.quantize(network)
|
||||
|
||||
# get learning rate
|
||||
lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
|
||||
lr_init=0,
|
||||
lr_end=0,
|
||||
lr_max=config.lr,
|
||||
warmup_epochs=config.warmup_epochs,
|
||||
total_epochs=epoch_size + config.start_epoch,
|
||||
steps_per_epoch=step_size))
|
||||
|
||||
# define optimization
|
||||
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
|
||||
config.weight_decay)
|
||||
# define model
|
||||
model = Model(network, loss_fn=loss, optimizer=opt)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
monitor = Monitor(lr_init=lr.asnumpy(),
|
||||
step_threshold=config.step_threshold)
|
||||
callback = [monitor]
|
||||
model.train(epoch_size, dataset, callbacks=callback,
|
||||
dataset_sink_mode=False)
|
||||
print("============== End Training ==============")
|
||||
train_time = monitor.step_mseconds
|
||||
print('train_time_used:{}'.format(train_time))
|
||||
avg_step_loss = np.mean(np.array(monitor.losses))
|
||||
print("average step loss:{}".format(avg_step_loss))
|
||||
expect_avg_step_loss = 2.32
|
||||
assert avg_step_loss < expect_avg_step_loss
|
||||
export_time_used = 960
|
||||
assert train_time < export_time_used
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_mobilenetv2_quant()
|
|
@ -1,120 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""MobileNetV2 utils"""
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
from mindspore.train.callback import Callback
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.nn.loss.loss import LossBase
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import functional as F
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
|
||||
class Monitor(Callback):
|
||||
"""
|
||||
Monitor loss and time.
|
||||
|
||||
Args:
|
||||
lr_init (numpy array): train lr
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Examples:
|
||||
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
|
||||
"""
|
||||
|
||||
def __init__(self, lr_init=None, step_threshold=10):
|
||||
super(Monitor, self).__init__()
|
||||
self.lr_init = lr_init
|
||||
self.lr_init_len = len(lr_init)
|
||||
self.step_threshold = step_threshold
|
||||
self.step_mseconds = 50000
|
||||
|
||||
def epoch_begin(self, run_context):
|
||||
self.losses = []
|
||||
self.epoch_time = time.time()
|
||||
|
||||
def epoch_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
|
||||
epoch_mseconds = (time.time() - self.epoch_time) * 1000
|
||||
per_step_mseconds = epoch_mseconds / cb_params.batch_num
|
||||
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
|
||||
per_step_mseconds,
|
||||
np.mean(self.losses)))
|
||||
self.epoch_mseconds = epoch_mseconds
|
||||
|
||||
def step_begin(self, run_context):
|
||||
self.step_time = time.time()
|
||||
|
||||
def step_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
step_mseconds = (time.time() - self.step_time) * 1000
|
||||
self.step_mseconds = min(self.step_mseconds, step_mseconds)
|
||||
step_loss = cb_params.net_outputs
|
||||
|
||||
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
|
||||
step_loss = step_loss[0]
|
||||
if isinstance(step_loss, Tensor):
|
||||
step_loss = np.mean(step_loss.asnumpy())
|
||||
|
||||
self.losses.append(step_loss)
|
||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
|
||||
|
||||
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
|
||||
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
|
||||
1, cb_params.batch_num, step_loss,
|
||||
np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
|
||||
|
||||
if cb_params.cur_step_num == self.step_threshold:
|
||||
run_context.request_stop()
|
||||
|
||||
|
||||
class CrossEntropyWithLabelSmooth(LossBase):
|
||||
"""
|
||||
CrossEntropyWith LabelSmooth.
|
||||
|
||||
Args:
|
||||
smooth_factor (float): smooth factor, default=0.
|
||||
num_classes (int): num classes
|
||||
|
||||
Returns:
|
||||
None.
|
||||
|
||||
Examples:
|
||||
>>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
|
||||
"""
|
||||
|
||||
def __init__(self, smooth_factor=0., num_classes=1000):
|
||||
super(CrossEntropyWithLabelSmooth, self).__init__()
|
||||
self.onehot = P.OneHot()
|
||||
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
|
||||
self.off_value = Tensor(1.0 * smooth_factor /
|
||||
(num_classes - 1), mstype.float32)
|
||||
self.ce = nn.SoftmaxCrossEntropyWithLogits()
|
||||
self.mean = P.ReduceMean(False)
|
||||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, logit, label):
|
||||
one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
|
||||
self.on_value, self.off_value)
|
||||
out_loss = self.ce(logit, one_hot_label)
|
||||
out_loss = self.mean(out_loss, 0)
|
||||
return out_loss
|
|
@ -22,20 +22,18 @@ from mindspore import nn
|
|||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore.common import set_seed
|
||||
from mindspore.compression.quant import create_quant_config
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, qconfig):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1),
|
||||
pad_mode='valid', quant_config=qconfig)
|
||||
self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid')
|
||||
def construct(self, x):
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
def test_conv2d_bn_fold_quant():
|
||||
set_seed(1)
|
||||
quant_config = create_quant_config()
|
||||
network = Net(quant_config)
|
||||
network = Net()
|
||||
inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32))
|
||||
label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32))
|
||||
opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9)
|
||||
|
@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant():
|
|||
train_network = nn.TrainOneStepCell(net_with_loss, opt)
|
||||
train_network.set_train()
|
||||
out_loss = train_network(inputs, label)
|
||||
print("------------------", out_loss.asnumpy())
|
||||
expect_loss = np.array([0.940427])
|
||||
error = np.array([0.1])
|
||||
diff = out_loss.asnumpy() - expect_loss
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
""" create train dataset. """
|
||||
|
||||
from functools import partial
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.transforms as C2
|
||||
import mindspore.dataset.vision as C
|
||||
|
||||
|
||||
def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
|
||||
"""
|
||||
create a train dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
config(EasyDict):the basic config for training
|
||||
repeat_num(int): the repeat times of dataset. Default: 1.
|
||||
batch_size(int): the batch size of dataset. Default: 32.
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
|
||||
load_func = partial(ds.Cifar10Dataset, dataset_path)
|
||||
data_set = load_func(num_parallel_workers=8, shuffle=False)
|
||||
|
||||
resize_height = config.image_height
|
||||
resize_width = config.image_width
|
||||
|
||||
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
||||
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
|
||||
|
||||
# define map operations
|
||||
resize_op = C.Resize((resize_height, resize_width))
|
||||
normalize_op = C.Normalize(mean=mean, std=std)
|
||||
changeswap_op = C.HWC2CHW()
|
||||
c_trans = [resize_op, normalize_op, changeswap_op]
|
||||
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
data_set = data_set.map(operations=c_trans, input_columns="image",
|
||||
num_parallel_workers=8)
|
||||
data_set = data_set.map(operations=type_cast_op,
|
||||
input_columns="label", num_parallel_workers=8)
|
||||
|
||||
# apply batch operations
|
||||
data_set = data_set.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
data_set = data_set.repeat(repeat_num)
|
||||
|
||||
return data_set
|
|
@ -1,93 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
if lr_decay_mode == 'steps':
|
||||
decay_epoch_index = [0.3 * total_steps,
|
||||
0.6 * total_steps, 0.8 * total_steps]
|
||||
for i in range(total_steps):
|
||||
if i < decay_epoch_index[0]:
|
||||
lr = lr_max
|
||||
elif i < decay_epoch_index[1]:
|
||||
lr = lr_max * 0.1
|
||||
elif i < decay_epoch_index[2]:
|
||||
lr = lr_max * 0.01
|
||||
else:
|
||||
lr = lr_max * 0.001
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'poly':
|
||||
if warmup_steps != 0:
|
||||
inc_each_step = (float(lr_max) - float(lr_init)) / \
|
||||
float(warmup_steps)
|
||||
else:
|
||||
inc_each_step = 0
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = float(lr_init) + inc_each_step * float(i)
|
||||
else:
|
||||
base = (1.0 - (float(i) - float(warmup_steps)) /
|
||||
(float(total_steps) - float(warmup_steps)))
|
||||
lr = float(lr_max) * base * base
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'cosine':
|
||||
decay_steps = total_steps - warmup_steps
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
|
||||
lr = float(lr_init) + lr_inc * (i + 1)
|
||||
else:
|
||||
linear_decay = (total_steps - i) / decay_steps
|
||||
cosine_decay = 0.5 * \
|
||||
(1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
|
||||
decayed = linear_decay * cosine_decay + 0.00001
|
||||
lr = lr_max * decayed
|
||||
lr_each_step.append(lr)
|
||||
else:
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_max - (lr_max - lr_end) * \
|
||||
(i - warmup_steps) / (total_steps - warmup_steps)
|
||||
lr_each_step.append(lr)
|
||||
|
||||
learning_rate = np.array(lr_each_step).astype(np.float32)
|
||||
|
||||
return learning_rate
|
|
@ -1,346 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""ResNet."""
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
import mindspore.common.initializer as weight_init
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import Tensor
|
||||
from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant
|
||||
from mindspore.compression.quant import create_quant_config
|
||||
|
||||
_ema_decay = 0.999
|
||||
_symmetric = True
|
||||
_fake = True
|
||||
_per_channel = True
|
||||
_quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))
|
||||
|
||||
|
||||
def _weight_variable(shape, factor=0.01):
|
||||
init_value = np.random.randn(*shape).astype(np.float32) * factor
|
||||
return Tensor(init_value)
|
||||
|
||||
|
||||
def _conv3x3(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 3, 3)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _conv1x1(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 1, 1)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _conv7x7(in_channel, out_channel, stride=1):
|
||||
weight_shape = (out_channel, in_channel, 7, 7)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Conv2d(in_channel, out_channel,
|
||||
kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
|
||||
|
||||
|
||||
def _bn(channel):
|
||||
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
|
||||
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
|
||||
def _bn_last(channel):
|
||||
return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
|
||||
gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
|
||||
|
||||
|
||||
def _fc(in_channel, out_channel):
|
||||
weight_shape = (out_channel, in_channel)
|
||||
weight = _weight_variable(weight_shape)
|
||||
return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Cell):
|
||||
"""
|
||||
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
|
||||
|
||||
Args:
|
||||
in_planes (int): Input channel.
|
||||
out_planes (int): Output channel.
|
||||
kernel_size (int): Input kernel size.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
|
||||
"""
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
padding = (kernel_size - 1) // 2
|
||||
conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
|
||||
group=groups, fake=_fake, quant_config=_quant_config)
|
||||
layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
|
||||
self.features = nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
output = self.features(x)
|
||||
return output
|
||||
|
||||
|
||||
class ResidualBlock(nn.Cell):
|
||||
"""
|
||||
ResNet V1 residual block definition.
|
||||
|
||||
Args:
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer. Default: 1.
|
||||
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResidualBlock(3, 256, stride=2)
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
in_channel,
|
||||
out_channel,
|
||||
stride=1):
|
||||
super(ResidualBlock, self).__init__()
|
||||
|
||||
channel = out_channel // self.expansion
|
||||
self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
|
||||
self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
|
||||
self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
|
||||
quant_config=_quant_config,
|
||||
kernel_size=1, stride=1, pad_mode='same', padding=0),
|
||||
FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
|
||||
]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
|
||||
quant_config=_quant_config,
|
||||
kernel_size=1, stride=1,
|
||||
pad_mode='same', padding=0)
|
||||
|
||||
self.down_sample = False
|
||||
|
||||
if stride != 1 or in_channel != out_channel:
|
||||
self.down_sample = True
|
||||
self.down_sample_layer = None
|
||||
|
||||
if self.down_sample:
|
||||
self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel,
|
||||
quant_config=_quant_config,
|
||||
kernel_size=1, stride=stride,
|
||||
pad_mode='same', padding=0),
|
||||
FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
|
||||
symmetric=False)
|
||||
]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel,
|
||||
fake=_fake,
|
||||
quant_config=_quant_config,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
pad_mode='same',
|
||||
padding=0)
|
||||
self.add = nn.TensorAddQuant()
|
||||
self.relu = P.ReLU()
|
||||
|
||||
def construct(self, x):
|
||||
identity = x
|
||||
out = self.conv1(x)
|
||||
out = self.conv2(out)
|
||||
out = self.conv3(out)
|
||||
|
||||
if self.down_sample:
|
||||
identity = self.down_sample_layer(identity)
|
||||
|
||||
out = self.add(out, identity)
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Cell):
|
||||
"""
|
||||
ResNet architecture.
|
||||
|
||||
Args:
|
||||
block (Cell): Block for network.
|
||||
layer_nums (list): Numbers of block in different layers.
|
||||
in_channels (list): Input channel in each layer.
|
||||
out_channels (list): Output channel in each layer.
|
||||
strides (list): Stride size in each layer.
|
||||
num_classes (int): The number of classes that the training images are belonging to.
|
||||
Returns:
|
||||
Tensor, output tensor.
|
||||
|
||||
Examples:
|
||||
>>> ResNet(ResidualBlock,
|
||||
>>> [3, 4, 6, 3],
|
||||
>>> [64, 256, 512, 1024],
|
||||
>>> [256, 512, 1024, 2048],
|
||||
>>> [1, 2, 2, 2],
|
||||
>>> 10)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
block,
|
||||
layer_nums,
|
||||
in_channels,
|
||||
out_channels,
|
||||
strides,
|
||||
num_classes):
|
||||
super(ResNet, self).__init__()
|
||||
|
||||
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
|
||||
raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
|
||||
|
||||
self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
|
||||
|
||||
self.layer1 = self._make_layer(block,
|
||||
layer_nums[0],
|
||||
in_channel=in_channels[0],
|
||||
out_channel=out_channels[0],
|
||||
stride=strides[0])
|
||||
self.layer2 = self._make_layer(block,
|
||||
layer_nums[1],
|
||||
in_channel=in_channels[1],
|
||||
out_channel=out_channels[1],
|
||||
stride=strides[1])
|
||||
self.layer3 = self._make_layer(block,
|
||||
layer_nums[2],
|
||||
in_channel=in_channels[2],
|
||||
out_channel=out_channels[2],
|
||||
stride=strides[2])
|
||||
self.layer4 = self._make_layer(block,
|
||||
layer_nums[3],
|
||||
in_channel=in_channels[3],
|
||||
out_channel=out_channels[3],
|
||||
stride=strides[3])
|
||||
|
||||
self.mean = P.ReduceMean(keep_dims=True)
|
||||
self.flatten = nn.Flatten()
|
||||
self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
|
||||
self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)
|
||||
|
||||
# init weights
|
||||
self._initialize_weights()
|
||||
|
||||
def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
|
||||
"""
|
||||
Make stage network of ResNet.
|
||||
|
||||
Args:
|
||||
block (Cell): Resnet block.
|
||||
layer_num (int): Layer number.
|
||||
in_channel (int): Input channel.
|
||||
out_channel (int): Output channel.
|
||||
stride (int): Stride size for the first convolutional layer.
|
||||
|
||||
Returns:
|
||||
SequentialCell, the output layer.
|
||||
|
||||
Examples:
|
||||
>>> _make_layer(ResidualBlock, 3, 128, 256, 2)
|
||||
"""
|
||||
layers = []
|
||||
|
||||
resnet_block = block(in_channel, out_channel, stride=stride)
|
||||
layers.append(resnet_block)
|
||||
|
||||
for _ in range(1, layer_num):
|
||||
resnet_block = block(out_channel, out_channel, stride=1)
|
||||
layers.append(resnet_block)
|
||||
|
||||
return nn.SequentialCell(layers)
|
||||
|
||||
def construct(self, x):
|
||||
x = self.conv1(x)
|
||||
c1 = self.maxpool(x)
|
||||
|
||||
c2 = self.layer1(c1)
|
||||
c3 = self.layer2(c2)
|
||||
c4 = self.layer3(c3)
|
||||
c5 = self.layer4(c4)
|
||||
|
||||
out = self.mean(c5, (2, 3))
|
||||
out = self.flatten(out)
|
||||
out = self.end_point(out)
|
||||
out = self.output_fake(out)
|
||||
return out
|
||||
|
||||
def _initialize_weights(self):
|
||||
|
||||
self.init_parameters_data()
|
||||
for _, m in self.cells_and_names():
|
||||
np.random.seed(1)
|
||||
|
||||
if isinstance(m, nn.Conv2dBnFoldQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
elif isinstance(m, nn.DenseQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
|
||||
m.weight.set_data(weight_init.initializer(weight_init.Normal(),
|
||||
m.weight.shape,
|
||||
m.weight.dtype))
|
||||
|
||||
|
||||
def resnet50_quant(class_num=10):
|
||||
"""
|
||||
Get ResNet50 neural network.
|
||||
|
||||
Args:
|
||||
class_num (int): Class number.
|
||||
|
||||
Returns:
|
||||
Cell, cell instance of ResNet50 neural network.
|
||||
|
||||
Examples:
|
||||
>>> net = resnet50_quant(10)
|
||||
"""
|
||||
return ResNet(ResidualBlock,
|
||||
[3, 4, 6, 3],
|
||||
[64, 256, 512, 1024],
|
||||
[256, 512, 1024, 2048],
|
||||
[1, 2, 2, 2],
|
||||
class_num)
|
||||
|
||||
|
||||
def resnet101_quant(class_num=1001):
|
||||
"""
|
||||
Get ResNet101 neural network.
|
||||
|
||||
Args:
|
||||
class_num (int): Class number.
|
||||
|
||||
Returns:
|
||||
Cell, cell instance of ResNet101 neural network.
|
||||
|
||||
Examples:
|
||||
>>> net = resnet101(1001)
|
||||
"""
|
||||
return ResNet(ResidualBlock,
|
||||
[3, 4, 23, 3],
|
||||
[64, 256, 512, 1024],
|
||||
[256, 512, 1024, 2048],
|
||||
[1, 2, 2, 2],
|
||||
class_num)
|
|
@ -1,131 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Train Resnet50_quant on Cifar10"""
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
from mindspore.nn.optim.momentum import Momentum
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.compression.quant import QuantizationAwareTraining
|
||||
from mindspore import set_seed
|
||||
|
||||
from resnet_quant_manual import resnet50_quant
|
||||
from dataset import create_dataset
|
||||
from lr_generator import get_lr
|
||||
from utils import Monitor, CrossEntropy
|
||||
|
||||
|
||||
config_quant = ed({
|
||||
"class_num": 10,
|
||||
"batch_size": 128,
|
||||
"step_threshold": 20,
|
||||
"loss_scale": 1024,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 1e-4,
|
||||
"epoch_size": 1,
|
||||
"pretrained_epoch_size": 90,
|
||||
"buffer_size": 1000,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"data_load_mode": "original",
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 50,
|
||||
"save_checkpoint_path": "./",
|
||||
"warmup_epochs": 0,
|
||||
"lr_decay_mode": "cosine",
|
||||
"use_label_smooth": True,
|
||||
"label_smooth_factor": 0.1,
|
||||
"lr_init": 0,
|
||||
"lr_max": 0.005,
|
||||
})
|
||||
|
||||
dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_resnet50_quant():
|
||||
set_seed(1)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
config = config_quant
|
||||
print("training configure: {}".format(config))
|
||||
epoch_size = config.epoch_size
|
||||
|
||||
# define network
|
||||
net = resnet50_quant(class_num=config.class_num)
|
||||
net.set_train(True)
|
||||
|
||||
# define loss
|
||||
if not config.use_label_smooth:
|
||||
config.label_smooth_factor = 0.0
|
||||
loss = CrossEntropy(
|
||||
smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
|
||||
#loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
|
||||
|
||||
# define dataset
|
||||
dataset = create_dataset(dataset_path=dataset_path,
|
||||
config=config,
|
||||
repeat_num=1,
|
||||
batch_size=config.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
# convert fusion network to quantization aware network
|
||||
quantizer = QuantizationAwareTraining(bn_fold=True,
|
||||
per_channel=[True, False],
|
||||
symmetric=[True, False])
|
||||
net = quantizer.quantize(net)
|
||||
|
||||
# get learning rate
|
||||
lr = Tensor(get_lr(lr_init=config.lr_init,
|
||||
lr_end=0.0,
|
||||
lr_max=config.lr_max,
|
||||
warmup_epochs=config.warmup_epochs,
|
||||
total_epochs=config.epoch_size,
|
||||
steps_per_epoch=step_size,
|
||||
lr_decay_mode='cosine'))
|
||||
|
||||
# define optimization
|
||||
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
|
||||
config.weight_decay, config.loss_scale)
|
||||
|
||||
# define model
|
||||
#model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
|
||||
model = Model(net, loss_fn=loss, optimizer=opt)
|
||||
|
||||
print("============== Starting Training ==============")
|
||||
monitor = Monitor(lr_init=lr.asnumpy(),
|
||||
step_threshold=config.step_threshold)
|
||||
|
||||
callbacks = [monitor]
|
||||
model.train(epoch_size, dataset, callbacks=callbacks,
|
||||
dataset_sink_mode=False)
|
||||
print("============== End Training ==============")
|
||||
|
||||
expect_avg_step_loss = 2.60
|
||||
avg_step_loss = np.mean(np.array(monitor.losses))
|
||||
|
||||
print("average step loss:{}".format(avg_step_loss))
|
||||
assert avg_step_loss < expect_avg_step_loss
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_resnet50_quant()
|
|
@ -1,105 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Resnet50 utils"""
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
from mindspore.train.callback import Callback
|
||||
from mindspore import Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.nn.loss.loss import LossBase
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import functional as F
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
|
||||
class Monitor(Callback):
|
||||
"""
|
||||
Monitor loss and time.
|
||||
|
||||
Args:
|
||||
lr_init (numpy array): train lr
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Examples:
|
||||
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
|
||||
"""
|
||||
|
||||
def __init__(self, lr_init=None, step_threshold=10):
|
||||
super(Monitor, self).__init__()
|
||||
self.lr_init = lr_init
|
||||
self.lr_init_len = len(lr_init)
|
||||
self.step_threshold = step_threshold
|
||||
|
||||
def epoch_begin(self, run_context):
|
||||
self.losses = []
|
||||
self.epoch_time = time.time()
|
||||
|
||||
def epoch_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
|
||||
epoch_mseconds = (time.time() - self.epoch_time) * 1000
|
||||
per_step_mseconds = epoch_mseconds / cb_params.batch_num
|
||||
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
|
||||
per_step_mseconds,
|
||||
np.mean(self.losses)))
|
||||
self.epoch_mseconds = epoch_mseconds
|
||||
|
||||
def step_begin(self, run_context):
|
||||
self.step_time = time.time()
|
||||
|
||||
def step_end(self, run_context):
|
||||
cb_params = run_context.original_args()
|
||||
step_mseconds = (time.time() - self.step_time) * 1000
|
||||
step_loss = cb_params.net_outputs
|
||||
|
||||
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
|
||||
step_loss = step_loss[0]
|
||||
if isinstance(step_loss, Tensor):
|
||||
step_loss = np.mean(step_loss.asnumpy())
|
||||
|
||||
self.losses.append(step_loss)
|
||||
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
|
||||
|
||||
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
|
||||
cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
|
||||
1, cb_params.batch_num, step_loss,
|
||||
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
|
||||
|
||||
if cb_params.cur_step_num == self.step_threshold:
|
||||
run_context.request_stop()
|
||||
|
||||
|
||||
class CrossEntropy(LossBase):
|
||||
"""the redefined loss function with SoftmaxCrossEntropyWithLogits"""
|
||||
|
||||
def __init__(self, smooth_factor=0, num_classes=1001):
|
||||
super(CrossEntropy, self).__init__()
|
||||
self.onehot = P.OneHot()
|
||||
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
|
||||
self.off_value = Tensor(1.0 * smooth_factor /
|
||||
(num_classes - 1), mstype.float32)
|
||||
self.ce = nn.SoftmaxCrossEntropyWithLogits()
|
||||
self.mean = P.ReduceMean(False)
|
||||
|
||||
def construct(self, logit, label):
|
||||
one_hot_label = self.onehot(label, F.shape(
|
||||
logit)[1], self.on_value, self.off_value)
|
||||
loss = self.ce(logit, one_hot_label)
|
||||
loss = self.mean(loss, 0)
|
||||
return loss
|
Loading…
Reference in New Issue