!47033 remove compression and testcases in mindspore

Merge pull request !47033 from hangq/wood
2022-12-23 08:20:27 +00:00 · 2022-12-23 08:20:27 +00:00 · b6653ab2d6
parent 62e37aed83 ea0b841653
commit b6653ab2d6
43 changed files with 7 additions and 6186 deletions
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -289,7 +289,6 @@ install(
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/cmake/package_mac.cmake
+++ b/cmake/package_mac.cmake
@ -164,7 +164,6 @@ install(
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@ -250,7 +250,6 @@ install(
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/docs/api/api_python/mindspore/mindspore.export.rst
+++ b/docs/api/api_python/mindspore/mindspore.export.rst
@ -23,9 +23,6 @@ mindspore.export
        - **kwargs** (dict) - 配置选项字典。
          - **quant_mode** (str) - 如果网络是量化感知训练网络，那么 `quant_mode` 需要设置为"QUANT"，否则 `quant_mode` 需要设置为"NONQUANT"。
          - **mean** (float) - 预处理后输入数据的平均值，用于量化网络的第一层。默认值：127.5。
          - **std_dev** (float) - 预处理后输入数据的方差，用于量化网络的第一层。默认值：127.5。
          - **enc_key** (str) - 用于加密的字节类型密钥，有效长度为16、24或者32。
          - **enc_mode** (Union[str, function]) - 指定加密模式，当设置 `enc_key` 时启用。
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) {
         "Get the number of parallel operators.")
    .def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
         "Get Allreduce Fusion Dictionary.")
    .def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"),
         "Fetch the inputs of Conv or Matmul for quant export.")
    .def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
         "Build data graph.")
    .def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"),
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() {
  ConfigManager::GetInstance().ResetConfig();
 }
 void GraphExecutorPy::GetWeightInfo(
  const CNodePtr &root_node, const AnfNodePtr &weight_node,
  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const {
  MS_EXCEPTION_IF_NULL(root_node);
  MS_EXCEPTION_IF_NULL(fake_quant_table);
  std::string weight_name;
  auto x = root_node->input(1);
  MS_EXCEPTION_IF_NULL(x);
  if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
    weight_name = weight_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
  } else {
    auto para = weight_node->cast_ptr<Parameter>();
    MS_EXCEPTION_IF_NULL(para);
    weight_name = para->name();
  }
  // find the fakequant from input
  int64_t count = 0;
  const int64_t max_depth = 5;
  auto is_quant_cnode = [](const AnfNodePtr &node) {
    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
  };
  while (!is_quant_cnode(x)) {
    if (count >= max_depth) {
      break;
    }
    auto cnode = x->cast_ptr<CNode>();
    if (cnode == nullptr || cnode->size() <= 1) {
      break;
    }
    x = cnode->input(1);
    count += 1;
  }
  if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
    (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
  }
  // get the fakequant parameter minq's name
  if (!is_quant_cnode(x)) {
    return;
  }
  auto cnode = x->cast_ptr<CNode>();
  constexpr size_t expect_input_size = 4;
  if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) {
    return;
  }
  const size_t fakequant_index = 2;
  auto fakequant_min_node = cnode->input(fakequant_index);
  if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
    return;
  }
  std::string fakequant_min_node_name;
  if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
    fakequant_min_node_name = fakequant_min_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
  } else {
    auto param = fakequant_min_node->cast_ptr<Parameter>();
    MS_EXCEPTION_IF_NULL(param);
    fakequant_min_node_name = param->name();
  }
  auto quant_op = GetValuePtr<PrimitivePy>(cnode->input(0));
  if (quant_op == nullptr) {
    return;
  }
  (*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name);
 }
 std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> GraphExecutorPy::FetchInfoForQuantExport(
  const std::string &phase) {
  FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
  MS_EXCEPTION_IF_NULL(func_graph);
  MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!";
  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> fake_quant_table;
  auto filter = [](const AnfNodePtr &node) {
    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
             IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
  };
  std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
  auto is_quant_cnode = [](const AnfNodePtr &node) {
    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
  };
  const size_t root_node_size = 3;
  const size_t weight_index = 2;
  for (const auto &node : nodes) {
    auto root_node = node->cast<CNodePtr>();
    if (root_node == nullptr || root_node->size() != root_node_size) {
      continue;
    }
    auto weight = root_node->input(weight_index);
    if (!is_quant_cnode(weight)) {
      auto tuple_node = weight->cast_ptr<CNode>();
      if (tuple_node != nullptr) {
        auto fake_node = tuple_node->input(1);
        if (!is_quant_cnode(fake_node)) {
          continue;
        } else {
          weight = fake_node;
        }
      }
    }
    // get parameter weight's name
    auto cnode = weight->cast_ptr<CNode>();
    MS_EXCEPTION_IF_NULL(cnode);
    auto weight_node = cnode->input(weight_index);
    MS_EXCEPTION_IF_NULL(weight_node);
    if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
      continue;
    }
    GetWeightInfo(root_node, weight_node, &fake_quant_table);
  }
  return fake_quant_table;
 }
 void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) {
  // save the graph to GraphExecutorPy
  FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
  void TerminateDebugger();
 #endif
  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
    const std::string &phase);
  // Generate a key for mapping function graph
  py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false);
@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
 private:
  GraphExecutorPy() = default;
  void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
                     std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const;
  void ParallelPostProcess(const string &phase);
  void GetGeBackendPolicy() const;
  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
@ -161,7 +161,6 @@
 #include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h"
 #include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h"
 #include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h"
@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr<session::KernelGraph> &kernel_graph
  unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>());
  unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>());
  unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>());
  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerLayerGradUnifyMindIR>());
  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerChannelGradUnifyMindIR>());
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
  if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
@ -1,233 +0,0 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
 #include <vector>
 #include <memory>
 #include "include/common/utils/utils.h"
 #include "utils/ms_context.h"
 #include "backend/common/optimizer/helper.h"
 #include "runtime/device/kernel_info.h"
 #include "backend/common/session/anf_runtime_algorithm.h"
 #include "include/common/utils/anfalgo.h"
 #include "utils/trace_base.h"
 namespace mindspore {
 namespace opt {
 void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD(
  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
  std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
  }
  std::vector<AnfNodePtr> lsq_perlayer_grad_d_inputs = {
    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDOpName)),
    lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3],
    lsq_perlayer_grad_inputs[kIndex4]};
  auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d);
  lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope());
  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL),
                common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)};
  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL),
                 common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)};
  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get());
  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d);
  CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
                                 lsq_perlayer_grad_d_outputs);
 }
 void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad(
  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
  const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
  std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
  }
  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be "
                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
  }
  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_inputs = {
    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)),
    lsq_perlayer_grad_d_outputs[kIndex1]};
  auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph);
  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad);
  lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope());
  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)};
  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)};
  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get());
  (*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad);
 }
 void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD(
  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
  std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
  }
  std::vector<AnfNodePtr> lsq_perchannel_grad_d_inputs = {
    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDOpName)),
    lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3],
    lsq_perchannel_grad_inputs[kIndex4]};
  auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d);
  lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope());
  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL),
                common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)};
  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL),
                 common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)};
  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get());
  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
  CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
                                 lsq_perchannel_grad_d_outputs);
 }
 void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad(
  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
  const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
  std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
  }
  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be "
                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size()
                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
  }
  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_inputs = {
    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)),
    lsq_perchannel_grad_d_outputs[kIndex1]};
  auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph);
  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad);
  lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope());
  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)};
  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)};
  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get());
  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad);
  (*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad);
 }
 const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const {
  VarPtr Xs = std::make_shared<SeqVar>();
  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradOpName);
  return VectorRef({prim, Xs});
 }
 const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
                                                                       const AnfNodePtr &node, const EquivPtr &) const {
  MS_EXCEPTION_IF_NULL(node);
  MS_EXCEPTION_IF_NULL(func_graph);
  auto cnode = node->cast<CNodePtr>();
  MS_EXCEPTION_IF_NULL(cnode);
  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
  MS_EXCEPTION_IF_NULL(primitive);
  std::vector<AnfNodePtr> lsq_perlayer_grad_d_outputs;
  CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs);
  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be "
                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
                      << trace::DumpSourceLines(node);
  }
  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_outputs;
  CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs,
                                       &lsq_perlayer_reduce_grad_outputs);
  if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) {
    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be "
                      << kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size()
                      << trace::DumpSourceLines(node);
  }
  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0],
                                               lsq_perlayer_reduce_grad_outputs[0]};
  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
  return make_tuple;
 }
 const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const {
  VarPtr Xs = std::make_shared<SeqVar>();
  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradOpName);
  return VectorRef({prim, Xs});
 }
 const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
                                                                         const AnfNodePtr &node,
                                                                         const EquivPtr &) const {
  MS_EXCEPTION_IF_NULL(node);
  MS_EXCEPTION_IF_NULL(func_graph);
  auto cnode = node->cast<CNodePtr>();
  MS_EXCEPTION_IF_NULL(cnode);
  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
  MS_EXCEPTION_IF_NULL(primitive);
  std::vector<AnfNodePtr> lsq_perchannel_grad_d_outputs;
  CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs);
  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be "
                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size()
                      << trace::DumpSourceLines(node);
  }
  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_outputs;
  CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs,
                                         &lsq_perchannel_reduce_grad_outputs);
  if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) {
    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be "
                      << kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size()
                      << trace::DumpSourceLines(node);
  }
  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0],
                                               lsq_perchannel_reduce_grad_outputs[0]};
  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
  return make_tuple;
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
@ -1,72 +0,0 @@
 /**
 * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
 #include <vector>
 #include "backend/common/optimizer/optimizer.h"
 #include "backend/common/optimizer/helper.h"
 namespace mindspore {
 namespace opt {
 constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2;
 constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5;
 constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2;
 constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad";
 constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD";
 constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce";
 constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad";
 constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD";
 constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce";
 constexpr auto kAttrNeg_trunc = "neg_trunc";
 constexpr auto kAttrChannelAxis = "channel_axis";
 class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass {
 public:
  explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true)
      : PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {}
  ~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default;
  const BaseRef DefinePattern() const override;
  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 private:
  void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
                                       std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const;
  void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
                                            const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
                                            std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const;
 };
 class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass {
 public:
  explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true)
      : PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {}
  ~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default;
  const BaseRef DefinePattern() const override;
  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 private:
  void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
                                         std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const;
  void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
                                              const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
                                              std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const;
 };
 }  // namespace opt
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
--- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy";
 constexpr const char kNameReLUV2[] = "ReLUV2";
 constexpr const char kNameAccumulateNV2[] = "AccumulateNV2";
 constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad";
 constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars";
 constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient";
 constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel";
 constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient";
 constexpr const char kNameActsULQ[] = "ActsULQ";
 constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad";
 constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad";
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits<std::vector<int
 OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}};
 REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad))
 // FakeQuantWithMinMaxVars
 INPUT_MAP(FakeQuantWithMinMaxVars) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
 ATTR_MAP(FakeQuantWithMinMaxVars) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
                                     {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
 OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}};
 REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars))
 // FakeQuantWithMinMaxVarsGradient
 INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
 ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
                                             {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
 OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
 REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient,
              ADPT_DESC(FakeQuantWithMinMaxVarsGradient))
 // FakeQuantWithMinMaxVarsPerChannel
 INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
 ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
                                               {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
 OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}};
 REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel,
              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel))
 // FakeQuantWithMinMaxVarsPerChannelGradient
 INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
 ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
                                                       {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
 OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
 REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient,
              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient))
 // GreaterEqual
 INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
 ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2)
 DECLARE_OP_ADAPTER(ConfusionMulGrad)
 DECLARE_OP_USE_OUTPUT(ConfusionMulGrad)
 DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars)
 DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars)
 DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient)
 DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient)
 DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel)
 DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel)
 DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient)
 DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient)
 DECLARE_OP_ADAPTER(GreaterEqual)
 DECLARE_OP_USE_OUTPUT(GreaterEqual)
--- a/mindspore/python/mindspore/common/api.py
+++ b/mindspore/python/mindspore/common/api.py
@ -1509,12 +1509,6 @@ class _CellGraphExecutor:
        """
        self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key)
    def fetch_info_for_quant_export(self, exec_id):
        """Get graph proto from pipeline."""
        if self._graph_executor.has_compiled(exec_id) is False:
            return None
        return self._graph_executor.fetch_info_for_quant_export(exec_id)
 def ms_memory_recycle():
    """
--- a/mindspore/python/mindspore/compression/OWNERS
+++ b/mindspore/python/mindspore/compression/OWNERS
@ -1,4 +0,0 @@
 approvers:
 - zhang_xue_tong
 - jpc_chenjianping
 - hangangqiang
--- a/mindspore/python/mindspore/compression/init.py
+++ b/mindspore/python/mindspore/compression/init.py
@ -1,19 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 MindSpore compression module.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
--- a/mindspore/python/mindspore/compression/common/init.py
+++ b/mindspore/python/mindspore/compression/common/init.py
@ -1,24 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Common module for various compression algorithms, now only including datatype definition for quantization.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
 from __future__ import absolute_import
 from mindspore.compression.common.constant import QuantDtype
 __all__ = ["QuantDtype"]
--- a/mindspore/python/mindspore/compression/common/constant.py
+++ b/mindspore/python/mindspore/compression/common/constant.py
@ -1,124 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Note:
    Constant module for compression. This is interface that is subject to change or deletion.
 """
 from __future__ import absolute_import
 import enum
 import re
 from types import DynamicClassAttribute
 __all__ = ["QuantDtype"]
@enum.unique
 class QuantDtype(enum.Enum):
    """
    An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`.
    """
    INT2 = "INT2"
    INT3 = "INT3"
    INT4 = "INT4"
    INT5 = "INT5"
    INT6 = "INT6"
    INT7 = "INT7"
    INT8 = "INT8"
    UINT2 = "UINT2"
    UINT3 = "UINT3"
    UINT4 = "UINT4"
    UINT5 = "UINT5"
    UINT6 = "UINT6"
    UINT7 = "UINT7"
    UINT8 = "UINT8"
    def __str__(self):
        return f"{self.name}"
    @staticmethod
    def is_signed(dtype):
        """
        Get whether the quant datatype is signed.
        Args:
            dtype (QuantDtype): quant datatype.
        Returns:
            bool, whether the input quant datatype is signed.
        Examples:
            >>> quant_dtype = QuantDtype.INT8
            >>> is_signed = QuantDtype.is_signed(quant_dtype)
        """
        return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
                         QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
    @staticmethod
    def switch_signed(dtype):
        """
        Switch the signed state of the input quant datatype.
        Args:
            dtype (QuantDtype): quant datatype.
        Returns:
            QuantDtype, quant datatype with opposite signed state as the input.
        Examples:
            >>> quant_dtype = QuantDtype.INT8
            >>> quant_dtype = QuantDtype.switch_signed(quant_dtype)
        """
        type_map = {
            QuantDtype.INT2: QuantDtype.UINT2,
            QuantDtype.INT3: QuantDtype.UINT3,
            QuantDtype.INT4: QuantDtype.UINT4,
            QuantDtype.INT5: QuantDtype.UINT5,
            QuantDtype.INT6: QuantDtype.UINT6,
            QuantDtype.INT7: QuantDtype.UINT7,
            QuantDtype.INT8: QuantDtype.UINT8,
            QuantDtype.UINT2: QuantDtype.INT2,
            QuantDtype.UINT3: QuantDtype.INT3,
            QuantDtype.UINT4: QuantDtype.INT4,
            QuantDtype.UINT5: QuantDtype.INT5,
            QuantDtype.UINT6: QuantDtype.INT6,
            QuantDtype.UINT7: QuantDtype.INT7,
            QuantDtype.UINT8: QuantDtype.INT8
        }
        return type_map.get(dtype)
    @DynamicClassAttribute
    def _value(self):
        """The value of the Enum member."""
        return int(re.search(r"(\d+)", self._value_).group(1))
    @DynamicClassAttribute
    def num_bits(self):
        """
        Get the num bits of the QuantDtype member.
        Returns:
            int, the num bits of the QuantDtype member.
        Examples:
            >>> from mindspore.compression.common import QuantDtype
            >>> quant_dtype = QuantDtype.INT8
            >>> num_bits = quant_dtype.num_bits
            >>> print(num_bits)
            8
        """
        return self._value
--- a/mindspore/python/mindspore/compression/export/init.py
+++ b/mindspore/python/mindspore/compression/export/init.py
@ -1,19 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Compression export module.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
--- a/mindspore/python/mindspore/compression/export/quant_export.py
+++ b/mindspore/python/mindspore/compression/export/quant_export.py
@ -1,515 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Note:
    Export for quantization. This is interface that is subject to change or deletion.
 """
 from __future__ import absolute_import
 import copy
 import numpy as np
 from mindspore import log as logger
 from mindspore import nn, ops
 from mindspore._checkparam import Validator
 from mindspore.common import Tensor
 from mindspore.common import dtype as mstype
 from mindspore.common.api import _cell_graph_executor as _executor
 from mindspore.common.parameter import Parameter
 from mindspore.nn import Cell
 from mindspore.nn.layer import quant
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.ops.operations import _inner_ops as inner
 from mindspore.compression.quant import quant_utils
 from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
 __all__ = ["ExportToQuantInferNetwork"]
 class QuantBlock(Cell):
    r"""
    A quant block of Conv/Dense, activation layer for Ascend deploy.
    Calculate Conv or Dense in Int8, with Quant and DeQuant.
    Notes:
        This block is only for deploy, and not trainable.
    Args:
        in_channels (int): The number of channels in the input space.
        out_channels (int): The number of channels in the output space.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
        batchnorm (bool): Specifies to used batchnorm or not. Default: None.
        activation (string): Specifies activation type. The optional values are as following:
            'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
    Inputs:
        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
    Outputs:
        Tensor of shape :math:`(N, out\_channels)`.
    """
    def __init__(self,
                 core_op,
                 weight,
                 quant_op,
                 dequant_op,
                 dequant_scale,
                 bias=None,
                 activation=None):
        super(QuantBlock, self).__init__()
        self.core_op = core_op
        self.weight = weight
        self.quant = quant_op
        self.dequant = dequant_op
        self.dequant_scale = dequant_scale
        self.bias = bias
        self.has_bias = bias is not None
        self.activation = activation
        self.has_act = activation is not None
        self.bias_add = P.BiasAdd()
        self.sub = P.Sub()
        self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
    def construct(self, x):
        x = self.quant(x)
        if self.has_bias:
            weight = self.sub(self.weight, self.weight_offset)
            x = self.core_op(x, weight)
            x = self.bias_add(x, self.bias)
        else:
            x = self.core_op(x, self.weight)
        x = self.dequant(x, self.dequant_scale)
        x = F.cast(x, mstype.float32)
        if self.has_act:
            x = self.activation(x)
        return x
    def extend_repr(self):
        s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
        if self.has_bias:
            s += f', bias=shape[{self.bias.shape}]'
        if self.has_act:
            s += f', activation={self.activation}'
        s += f', dequant={self.dequant}'
        return s
 class QuantMindirBlock(Cell):
    """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
       Args:
        core_op (Cell): The operation cell.
        weight (Tensor): The weight of the cell.
        bias (Tensor): The bias of the cell. Default: None.
        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
        param_dict (dict): The information of the cell.
    """
    def __init__(self,
                 core_op,
                 weight,
                 bias=None,
                 activation=None,
                 param_dict=None):
        super(QuantMindirBlock, self).__init__()
        self.core_op = core_op
        if activation is not None:
            self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
        self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
        self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
        if param_dict["output_maxq"] is not None:
            self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
            self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
        self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
        if hasattr(core_op, 'pad_mode'):
            self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
        self.core_op.add_prim_attr("act_num_bits", Tensor(8))
        self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
        self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
        if param_dict["input_narrow_range"] is not None:
            self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
        if param_dict["output_narrow_range"] is not None:
            self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
        if param_dict["input_maxq"] == 'None':
            self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
            self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
        elif param_dict["input_maxq"] is not None:
            self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
            self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
        self.weight = weight
        self.bias = bias
        self.has_bias = bias is not None
        self.activation = activation
        self.has_act = activation is not None
        self.bias_add = P.BiasAdd()
    def construct(self, x):
        if self.has_bias:
            x = self.core_op(x, self.weight)
            x = self.bias_add(x, self.bias)
        else:
            x = self.core_op(x, self.weight)
        if self.has_act:
            x = self.activation(x)
        return x
    def extend_repr(self):
        s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
        if self.has_bias:
            s += f', bias=shape[{self.bias.shape}]'
        if self.has_act:
            s += f', activation={self.activation}'
        return s
 class ExportToQuantInferNetwork:
    """
    Convert quantization aware network to infer network.
    Args:
        network (Cell): MindSpore quantization aware training network.
        inputs (Tensor): Input tensors of the `quantization aware training network`.
        mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
          Default: 127.5.
        std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
          of network. Default: 127.5.
        is_mindir (bool): Whether export MINDIR format. Default: False.
    Returns:
        Cell, Infer network.
    """
    def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
        network = Validator.check_isinstance('network', network, (nn.Cell,))
        self.data_type = mstype.int8
        self.network = copy.deepcopy(network)
        self.network_bk = copy.deepcopy(network)
        self.get_inputs_table(inputs)
        self.mean = mean
        self.std_dev = std_dev
        self.is_mindir = is_mindir
        self.upcell = None
    @staticmethod
    def __get_dequant_scale(scale_a_in, scale_w):
        """Get dequant scale"""
        scale_deq = scale_a_in * scale_w
        # fuse parameter
        # |--------|47:40|--------|39:32|--------|31:0|
        #         offset_w [8]    shift_N [8]    deq_scale [32]
        float32_deq_scale = scale_deq.astype(np.float32)
        uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
        scale_length = scale_deq.size  # channel
        dequant_param = np.zeros(scale_length, dtype=np.uint64)
        for index in range(scale_length):
            dequant_param[index] += uint32_deq_scale[index]
        scale_deq = Tensor(dequant_param, mstype.uint64)
        return scale_deq
    def get_inputs_table(self, inputs):
        """Get the input quantization parameters of quantization cell for quant export."""
        phase_name = 'export_quant'
        graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
        self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
    def run(self):
        """Start to convert."""
        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
        self.network.update_cell_prefix()
        network = self.network
        if isinstance(network, _AddFakeQuantInput):
            network = network.network
        network = self._convert_quant2deploy(network)
        return network
    def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
        """convert network's quant subcell to deploy subcell"""
        scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
        # Build the `Quant` `Dequant` op.
        # Quant only support perlayer version. Need check here.
        if float(scale_a_in) == 0:
            raise ValueError("If `scale_a_in` is zero, will lead to zero error.")
        quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
        scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
        dequant_op = inner.Dequant()
        if isinstance(activation, _AddFakeQuantAfterSubCell):
            activation = activation.subcell
        elif hasattr(activation, "get_origin"):
            activation = activation.get_origin()
        # get op
        if isinstance(cell_core, quant.DenseQuant):
            op_core = P.MatMul()
        else:
            op_core = cell_core.conv
        # get the `weight` and `bias`
        weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
        if self.is_mindir:
            block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
        else:
            block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
        return block
    def _get_input_quant_param(self, minq_name, np_type, param_dict):
        """get input quant parameter for quant block"""
        fake_quant_a_in_prefix = minq_name[:-5]
        cells = self.network_bk.cells_and_names()
        for cell in cells:
            if cell[0].endswith(fake_quant_a_in_prefix):
                fake_quant_a_in = cell[1]
                break
        scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
            quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
        param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
        return scale_a_in, zp_a_in
    def __get_quant_param(self, cell_core, fake_quant_a_out):
        """get parameter for quant block"""
        w_minq_name = cell_core.fake_quant_weight.minq.name
        w_maxq_name = cell_core.fake_quant_weight.maxq.name
        np_type = mstype.dtype_to_nptype(self.data_type)
        param_dict = dict()
        param_dict["filter_maxq"] = None
        param_dict["filter_minq"] = None
        param_dict["output_maxq"] = None
        param_dict["output_minq"] = None
        param_dict["input_maxq"] = None
        param_dict["input_minq"] = None
        param_dict["input_narrow_range"] = None
        param_dict["output_narrow_range"] = None
        param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
        param_dict["mean"] = self.mean
        param_dict["std_dev"] = self.std_dev
        param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
        param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
        scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
            quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
        if fake_quant_a_out is not None:
            _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
                quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
            param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
        info = self.quant_info_table.get(w_minq_name, None)
        if not info:
            info = self.quant_info_table.get(w_maxq_name, None)
        if info:
            _, minq_name = info
            if minq_name == 'input':
                scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
                    (1 / self.std_dev), round(self.mean), 'None', 'None'
            else:
                scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
        else:
            # skip quant layer
            scale_a_in, zp_a_in = 1.0, 0.0
        return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
    def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
        """Get weight and bias for quantizaiton"""
        np_type = mstype.dtype_to_nptype(self.data_type)
        weight = cell_core.weight.data.asnumpy()
        bias = None
        if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
            if cell_core.has_bias:
                bias = cell_core.bias.data.asnumpy()
        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
            weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
        elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
            weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
        weight_b = weight
        bias_b = bias
        # apply the quant
        quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
                                                             cell_core.fake_quant_weight.num_bits,
                                                             cell_core.fake_quant_weight.narrow_range)
        weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
        if bias is not None:
            if 0 in scale_a_in:
                raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.")
            if 0 in scale_w:
                raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.")
            bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
        if isinstance(cell_core, quant.DenseQuant):
            weight = np.transpose(weight)
            weight_b = np.transpose(weight_b)
        weight_tensor = Tensor(weight, self.data_type)
        weight_b_tensor = Tensor(weight_b)
        if bias_b is not None:
            bias_b_tensor = Tensor(bias_b, mstype.float32)
            return weight_tensor, bias, weight_b_tensor, bias_b_tensor
        return weight_tensor, bias, weight_b_tensor, None
    def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
        """add output quant info for quant op for export mindir."""
        if self.is_mindir:
            if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
                np_type = mstype.dtype_to_nptype(self.data_type)
                _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
                origin_op.add_prim_attr('output_maxq', Tensor(maxq))
                origin_op.add_prim_attr('output_minq', Tensor(minq))
    def _convert_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell."""
        if subcell is not None and hasattr(subcell, "fake_quant_weight"):
            new_subcell = self._get_quant_block(subcell, None, None)
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            self.upcell = new_subcell
            network.insert_child_to_cell(name, new_subcell)
            change = True
        return network, change
    def _convert_conv(self, network, change, name, subcell):
        """Convert subcell to ant subcell for conv."""
        cell_core = subcell.conv
        activation = subcell.activation
        fake_quant_act = None
        if hasattr(activation, 'fake_quant_act_before'):
            fake_quant_act = activation.fake_quant_act_before
        elif hasattr(activation, 'fake_quant_act'):
            fake_quant_act = activation.fake_quant_act
        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
            self.upcell = None
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            network.insert_child_to_cell(name, new_subcell)
            change = True
        return network, change
    def _convert_dense(self, network, change, name, subcell):
        """Convert subcell to ant subcell for dense."""
        cell_core = subcell.dense
        activation = subcell.activation
        fake_quant_act = None
        if hasattr(activation, 'fake_quant_act_before'):
            fake_quant_act = activation.fake_quant_act_before
        elif hasattr(activation, 'fake_quant_act'):
            fake_quant_act = activation.fake_quant_act
        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
            prefix = subcell.param_prefix
            new_subcell.update_parameters_name(prefix + '.')
            network.insert_child_to_cell(name, new_subcell)
            self.upcell = None
            change = True
        return network, change
    def _convert_act(self, subcell):
        """Convert subcell to ant subcell for activation."""
        activation = subcell.get_origin()
        if isinstance(activation, nn.ReLU):
            self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
        elif isinstance(activation, nn.ReLU6):
            self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
        if self.upcell:
            self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
        return activation
    def _convert_add(self, subcell):
        """Convert subcell to ant subcell for add."""
        if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
            add_op = subcell.add.subcell
            subcell.__delattr__("add")
            subcell.__setattr__("add", add_op)
        add_op = subcell.add
        self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
        subcell.__delattr__("fake_quant_act")
        subcell.__setattr__("fake_quant_act", P.identity())
    def _convert_observer(self, network, name, subcell):
        """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
        if self.upcell:
            self._add_output_min_max_for_op(self.upcell.core_op, subcell)
        network.__delattr__(name)
        network.__setattr__(name, P.identity())
    def _convert_fake_quant_after_cell(self, network, name, subcell):
        """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
        op = subcell.subcell
        self._add_output_min_max_for_op(op, subcell.fake_quant_act)
        network.__delattr__(name)
        network.__setattr__(name, op)
    def _convert_core_quant_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell for conv and dense."""
        is_core_subcell = True
        if isinstance(subcell, nn.Conv2dBnAct):
            network, change = self._convert_conv(network, change, name, subcell)
        elif isinstance(subcell, nn.DenseBnAct):
            network, change = self._convert_dense(network, change, name, subcell)
        elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
                                  quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
            network, change = self._convert_subcell(network, change, name, subcell)
        else:
            is_core_subcell = False
        return is_core_subcell, network, change
    def _convert_other_quant_subcell(self, network, change, name, subcell):
        """Convert subcell to ant subcell for cell except conv and dense."""
        is_other_subcell = True
        if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
            activation = self._convert_act(subcell)
            network.insert_child_to_cell(name, activation)
            change = True
        elif isinstance(subcell, nn.TensorAddQuant):
            self._convert_add(subcell)
        elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
            self._convert_observer(network, name, subcell)
        elif isinstance(subcell, _AddFakeQuantAfterSubCell):
            self._convert_fake_quant_after_cell(network, name, subcell)
            change = True
        else:
            is_other_subcell = False
        return is_other_subcell, network, change
    def _convert_quant2deploy(self, network):
        """Convert network's all quant subcell to deploy subcell."""
        cells = network.name_cells()
        change = False
        for name in cells:
            subcell = cells[name]
            if subcell == network:
                continue
            is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
            is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
            if not is_core_quant_subcell and not is_other_quant_subcell:
                self.upcell = None
                self._convert_quant2deploy(subcell)
        if isinstance(network, nn.SequentialCell) and change:
            network.cell_list = list(network.cells())
        return network
--- a/mindspore/python/mindspore/compression/quant/init.py
+++ b/mindspore/python/mindspore/compression/quant/init.py
@ -1,28 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Quantization module, including base class of the quantizer, the quantization aware training algorithm,
 and quantization utils.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
 from __future__ import absolute_import
 from .quantizer import OptimizeOption
 from .qat import QuantizationAwareTraining, create_quant_config
 from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers
 __all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining",
           "create_quant_config", "OptimizeOption"]
--- a/mindspore/python/mindspore/compression/quant/qat.py
+++ b/mindspore/python/mindspore/compression/quant/qat.py
@ -1,634 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Quantization aware training
 User can use quantization aware to train a model. MindSpore supports quantization aware training,
 which models quantization errors in both the forward and backward passes using fake-quantization
 operations. Note that the entire computation is carried out in floating point. At the end of quantization
 aware training, MindSpore provides conversion functions to convert the trained model into lower precision.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
 from __future__ import absolute_import
 import re
 import numpy as np
 import mindspore.context as context
 from mindspore import log as logger
 from mindspore import nn, ops
 from mindspore._checkparam import Validator, Rel
 from mindspore.nn.layer import quant
 from mindspore.ops import functional as F
 from ..common import QuantDtype
 from .quantizer import Quantizer, OptimizeOption
 from .quant_utils import compute_kl_threshold
 __all__ = ["QuantizationAwareTraining", "create_quant_config"]
 def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver),
                        quant_delay=(0, 0),
                        quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
                        per_channel=(False, False),
                        symmetric=(False, False),
                        narrow_range=(False, False),
                        mode="DEFAULT"):
    r"""
    Config the observer type of weights and data flow with quant parameters.
    Args:
        quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
            applies to weights and the second applies to data flow. Currently, only
            :class:`FakeQuantWithMinMaxObserver` supported.
            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and the second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
            element represents weights and the second element represents data flow.
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel, otherwise base on per layer. The first element represents weights
            and the second element represents data flow, and the second element must be `False` now.
            Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow.
            Default: (False, False).
        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
            Default: "DEFAULT".
    Returns:
        QuantConfig, contains the observer type of weight and activation.
    Raises:
        ValueError: If the second element of `per_channel` is not `False`.
    """
    if per_channel[-1]:
        raise ValueError("Arg 'per_channel' second element must be 'False'.")
    weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
                                                     per_channel=per_channel[0], symmetric=symmetric[0],
                                                     narrow_range=narrow_range[0], mode=mode)
    act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
                                                   per_channel=per_channel[-1], symmetric=symmetric[-1],
                                                   narrow_range=narrow_range[-1], mode=mode)
    return quant.QuantConfig(weight=weight_observer, activation=act_observer)
 class _AddFakeQuantInput(nn.Cell):
    """
    Add FakeQuant OP at input of the network. Only support one input case.
    """
    def __init__(self, network, quant_delay=0):
        super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
        self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
                                                                  quant_delay=quant_delay, ema=True)
        self.fake_quant_input.update_parameters_name('fake_quant_input.')
        self.network = network
    def construct(self, data):
        data = self.fake_quant_input(data)
        output = self.network(data)
        return output
 class _AddFakeQuantAfterSubCell(nn.Cell):
    """
    Add FakeQuant OP after of the sub Cell.
    """
    def __init__(self, subcell, **kwargs):
        super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
        self.subcell = subcell
        self.mode = "DEFAULT"
        self.max_init = 6
        self.min_init = -6
        if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]:
            self.mode = "LEARNED_SCALE"
            self.max_init = 16
            self.min_init = -16
        self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init,
                                                                max_init=self.max_init,
                                                                ema=True,
                                                                quant_dtype=kwargs.get("quant_dtype"),
                                                                quant_delay=kwargs.get("quant_delay"),
                                                                per_channel=kwargs.get("per_channel"),
                                                                symmetric=kwargs.get("symmetric"),
                                                                narrow_range=kwargs.get("narrow_range"),
                                                                mode=self.mode)
    def construct(self, *data):
        output = self.subcell(*data)
        output = self.fake_quant_act(output)
        return output
 class QuantizationAwareTraining(Quantizer):
    r"""
    Quantizer for quantization aware training.
    Args:
        bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
        freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
            Default: 1e7.
        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
            during train and eval. The first element represents weights and the second element represents data flow.
            Default: (0, 0).
        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
            element represents weights and the second element represents data flow. It is necessary to consider the
            precision support of hardware devices in the practical quantization infer scenario.
            Default: (QuantDtype.INT8, QuantDtype.INT8).
        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
            then base on per channel, otherwise base on per layer. The first element represents weights and the
            second element represents data flow, and the second element must be `False` now. Default: (False, False).
        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
            element represents data flow. Default: (False, False).
        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
            The first element represents weights and the second element represents data flow.
            Default: (False, False).
        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
            only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
            `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
            includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
            such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
            scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
        one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
    Supported Platforms:
        ``Ascend`` ``GPU``
    Raises:
        TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
        TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
            is not bool.
        TypeError: If the element of `quant_dtype` is not `QuantDtype`.
        ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
            not less than 2.
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).
    Examples:
        >>> from mindspore.compression.quant import QuantizationAwareTraining
        >>> from mindspore import nn
        >>> class LeNet5(nn.Cell):
        ...     def __init__(self, num_class=10, channel=1):
        ...         super(LeNet5, self).__init__()
        ...         self.type = "fusion"
        ...         self.num_class = num_class
        ...
        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
        ...         # change `nn.Dense` to `nn.DenseBnAct`
        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
        ...
        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        ...         self.flatten = nn.Flatten()
        ...
        ...     def construct(self, x):
        ...         x = self.conv1(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.conv2(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.flatten(x)
        ...         x = self.fc1(x)
        ...         x = self.fc2(x)
        ...         x = self.fc3(x)
        ...         return x
        ...
        >>> net = LeNet5()
        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
        >>> net_qat = quantizer.quantize(net)
    """
    __quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"]
    def __init__(self,
                 bn_fold=True,
                 freeze_bn=10000000,
                 quant_delay=(0, 0),
                 quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
                 per_channel=(False, False),
                 symmetric=(False, False),
                 narrow_range=(False, False),
                 optimize_option=OptimizeOption.QAT,
                 one_conv_fold=True):
        """Init for QuantizationAwareTraining quantizer"""
        super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option)
        def convert2list(name, value):
            if not isinstance(value, list) and not isinstance(value, tuple):
                value = [value]
            elif len(value) > 2:
                raise ValueError("input `{}` len should less then 2".format(name))
            return value
        quant_delay_list = convert2list("quant delay", quant_delay)
        quant_dtype_list = convert2list("quant dtype", quant_dtype)
        per_channel_list = convert2list("per channel", per_channel)
        symmetric_list = convert2list("symmetric", symmetric)
        narrow_range_list = convert2list("narrow range", narrow_range)
        self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay")
        self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay")
        self.bn_fold = Validator.check_bool(bn_fold, "bn fold")
        self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn")
        self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype)
        self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype)
        self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel")
        self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel")
        self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric")
        self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric")
        self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range")
        self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range")
        self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold")
        self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv,
                                    nn.DenseBnAct: self._convert_dense}
        self.mode = "DEFAULT"
        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
            self.mode = "LEARNED_SCALE"
            if not self.weight_symmetric or not self.act_symmetric:
                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support "
                                 "symmetric=(True, True) for quant")
            if not self.weight_range or not self.act_range:
                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) "
                                 "for quant")
            if self.freeze_bn != 0:
                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, "
                                 "but get freeze_bn={}".format(self.freeze_bn))
            if self.weight_qdelay != 0 or self.act_qdelay != 0:
                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)")
        self.quant_config = create_quant_config(quant_delay=quant_delay_list,
                                                quant_dtype=quant_dtype_list,
                                                per_channel=per_channel_list,
                                                symmetric=symmetric_list,
                                                narrow_range=narrow_range_list,
                                                mode=self.mode)
        self.eps = 1e-5
    @staticmethod
    def _convert_op_name(name):
        pattern = re.compile(r'([A-Z]{1})')
        name_new = re.sub(pattern, r'_\1', name).lower()
        if name_new[0] == '_':
            name_new = name_new[1:]
        return name_new
    def quantize(self, network):
        """
        Quant API to convert input network to a quantization aware training network.
        Note:
            Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.
        Args:
            network (Cell): network to be quantized.
        Returns:
            Cell, a quantization aware training network.
        Raises:
            KeyError: If the `device_target` set in context is not in `support_device`.
        """
        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
        support_device = ["Ascend", "GPU"]
        if context.get_context('device_target') not in support_device:
            raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
        if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option:
            network.update_cell_prefix()
            network = self._convert_subcells2quant(network)
            network.update_cell_type("quant")
        return network
    def _convert_subcells2quant(self, network):
        """
        convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell
        """
        cells = network.name_cells()
        change = False
        for name in cells:
            subcell = cells[name]
            if subcell == network:
                continue
            if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)):
                prefix = subcell.param_prefix
                new_subcell = self._convert_method_map[type(subcell)](subcell)
                new_subcell.update_parameters_name(prefix + '.')
                network.insert_child_to_cell(name, new_subcell)
                change = True
            else:
                self._convert_subcells2quant(subcell)
        if isinstance(network, nn.SequentialCell) and change:
            network.cell_list = list(network.cells())
        # add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell.
        if isinstance(network, (nn.FakeQuantWithMinMaxObserver,
                                nn.Conv2dBnFoldQuantOneConv,
                                nn.Conv2dBnFoldQuant,
                                nn.Conv2dBnWithoutFoldQuant,
                                nn.Conv2dQuant,
                                nn.DenseQuant,
                                nn.ActQuant,
                                nn.TensorAddQuant,
                                nn.MulQuant)):
            return network
        add_list = []
        for name in network.__dict__:
            if name[0] == '_':
                continue
            attr = network.__dict__[name]
            if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name:
                add_list.append((name, attr))
        for name, prim_op in add_list:
            prefix = name
            add_quant = _AddFakeQuantAfterSubCell(prim_op,
                                                  quant_dtype=self.act_dtype,
                                                  quant_delay=self.act_qdelay,
                                                  per_channel=self.act_channel,
                                                  symmetric=self.act_symmetric,
                                                  narrow_range=self.act_range,
                                                  optimize_option=self.optimize_option)
            if network.param_prefix:
                prefix = '.'.join([network.param_prefix, prefix])
            add_quant.update_parameters_name(prefix + '.')
            del network.__dict__[name]
            network.insert_child_to_cell(name, add_quant)
        return network
    def _convert_conv(self, subcell):
        """
        convert Conv2d cell to quant cell
        """
        min_init = -6
        max_init = 6
        if self.eps == 0:
            raise ValueError("`epsilon` is zero may lead to divide zero error")
        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
            subcell_weight_para = subcell.conv.weight.data.asnumpy()
            if subcell.has_bn:
                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
        self.quant_config = self.quant_config._replace(
            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
        conv_inner = subcell.conv
        if subcell.has_bn:
            bn_inner = subcell.batchnorm
            if self.bn_fold:
                if self.one_conv_fold:
                    conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels,
                                                                conv_inner.out_channels,
                                                                kernel_size=conv_inner.kernel_size,
                                                                stride=conv_inner.stride,
                                                                pad_mode=conv_inner.pad_mode,
                                                                padding=conv_inner.padding,
                                                                dilation=conv_inner.dilation,
                                                                group=conv_inner.group,
                                                                eps=bn_inner.eps,
                                                                momentum=1 - bn_inner.momentum,
                                                                has_bias=conv_inner.has_bias,
                                                                bias_init=conv_inner.bias_init,
                                                                quant_config=self.quant_config,
                                                                quant_dtype=self.weight_dtype,
                                                                fake=True)
                else:
                    conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels,
                                                         conv_inner.out_channels,
                                                         kernel_size=conv_inner.kernel_size,
                                                         stride=conv_inner.stride,
                                                         pad_mode=conv_inner.pad_mode,
                                                         padding=conv_inner.padding,
                                                         dilation=conv_inner.dilation,
                                                         group=conv_inner.group,
                                                         eps=bn_inner.eps,
                                                         momentum=1 - bn_inner.momentum,
                                                         has_bias=conv_inner.has_bias,
                                                         bias_init=conv_inner.bias_init,
                                                         freeze_bn=self.freeze_bn,
                                                         quant_config=self.quant_config,
                                                         quant_dtype=self.weight_dtype,
                                                         fake=True)
                # change original network Batch Normalization OP parameters to quant network
                conv_inner.gamma = subcell.batchnorm.gamma
                conv_inner.beta = subcell.batchnorm.beta
                conv_inner.moving_mean = subcell.batchnorm.moving_mean
                conv_inner.moving_variance = subcell.batchnorm.moving_variance
            else:
                conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels,
                                                            conv_inner.out_channels,
                                                            kernel_size=conv_inner.kernel_size,
                                                            stride=conv_inner.stride,
                                                            pad_mode=conv_inner.pad_mode,
                                                            padding=conv_inner.padding,
                                                            dilation=conv_inner.dilation,
                                                            group=conv_inner.group,
                                                            eps=bn_inner.eps,
                                                            momentum=1 - bn_inner.momentum,
                                                            has_bias=conv_inner.has_bias,
                                                            bias_init=conv_inner.bias_init,
                                                            quant_config=self.quant_config)
                # change original network Batch Normalization OP parameters to quant network
                conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
                conv_inner.batchnorm.beta = subcell.batchnorm.beta
                conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
                conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance
            del subcell.batchnorm
            subcell.batchnorm = None
            subcell.has_bn = False
        else:
            conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels,
                                           kernel_size=conv_inner.kernel_size, stride=conv_inner.stride,
                                           pad_mode=conv_inner.pad_mode, padding=conv_inner.padding,
                                           dilation=conv_inner.dilation, group=conv_inner.group,
                                           has_bias=conv_inner.has_bias, quant_config=self.quant_config,
                                           quant_dtype=self.weight_dtype)
        # change original network Conv2D OP parameters to quant network
        conv_inner.weight = subcell.conv.weight
        if subcell.conv.has_bias:
            conv_inner.bias = subcell.conv.bias
        subcell.conv = conv_inner
        if subcell.has_act and subcell.activation is not None:
            subcell.activation = self._convert_activation(subcell.activation)
        elif subcell.after_fake:
            subcell.has_act = True
            subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype,
                                                           quant_delay=self.act_qdelay, per_channel=self.act_channel,
                                                           symmetric=self.act_symmetric, narrow_range=self.act_range,
                                                           optimize_option=self.optimize_option)
        return subcell
    def _convert_dense(self, subcell):
        """
        convert dense cell to quant cell
        """
        min_init = -6
        max_init = 6
        if self.eps == 0:
            raise ValueError("`epsilon` is zero may lead to divide zero error")
        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
            subcell_weight_para = subcell.dense.weight.data.asnumpy()
            if subcell.has_bn:
                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
        self.quant_config = self.quant_config._replace(
            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
        dense_inner = subcell.dense
        dense_inner = quant.DenseQuant(dense_inner.in_channels,
                                       dense_inner.out_channels,
                                       has_bias=dense_inner.has_bias,
                                       quant_config=self.quant_config,
                                       quant_dtype=self.weight_dtype)
        # change original network Dense OP parameters to quant network
        dense_inner.weight = subcell.dense.weight
        if subcell.dense.has_bias:
            dense_inner.bias = subcell.dense.bias
        subcell.dense = dense_inner
        if subcell.has_act and subcell.activation is not None:
            subcell.activation = self._convert_activation(subcell.activation)
        elif subcell.after_fake:
            subcell.has_act = True
            subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
                                                           quant_dtype=self.act_dtype,
                                                           quant_delay=self.act_qdelay,
                                                           per_channel=self.act_channel,
                                                           symmetric=self.act_symmetric,
                                                           narrow_range=self.act_range,
                                                           optimize_option=self.optimize_option)
        return subcell
    def _convert_activation(self, activation):
        """
        convert activation cell to quant cell
        """
        act_class = activation.__class__
        act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid]
        act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish]
        if act_class in act_list:
            return quant.ActQuant(activation=activation,
                                  quant_config=self.quant_config,
                                  quant_dtype=self.act_dtype)
        if act_class in act_list_with_fake_before:
            return quant.ActQuant(activation=activation,
                                  ema=True,
                                  fake_before=True,
                                  quant_config=self.quant_config,
                                  quant_dtype=self.act_dtype)
        raise ValueError("Unsupported activation in auto quant: ", act_class)
    def _kl_init(self, subcell_weight_para, weight_dtype):
        """
        Calculate the value of max_init and min_init with compute_kl_threshold.
        """
        if self.weight_channel:
            max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
                        for weight_para_each in subcell_weight_para]
            min_init = [-x for x in max_init]
        else:
            max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
            min_init = [-x for x in max_init]
        return min_init, max_init
    def _set_mixed_bits(self, network, strategy):
        r"""
        Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
        optimize_option.
        Args:
            network (Cell): Input network.
            strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
                ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
                convolution layer is supported.
        Returns:
            Cell, a network with mixed bit strategy configured.
        Raises:
            ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
        """
        if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
            raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
                             "optimize_option.")
        quantizable_idx = []
        pass_cell = None
        for i, cell_and_name in enumerate(network.cells_and_names()):
            cell = cell_and_name[1]
            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
                quantizable_idx.append(i)
        if len(quantizable_idx) != len(strategy):
            raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")
        quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
        type_map = {
            QuantDtype.INT2.num_bits: QuantDtype.INT2,
            QuantDtype.INT3.num_bits: QuantDtype.INT3,
            QuantDtype.INT4.num_bits: QuantDtype.INT4,
            QuantDtype.INT5.num_bits: QuantDtype.INT5,
            QuantDtype.INT6.num_bits: QuantDtype.INT6,
            QuantDtype.INT7.num_bits: QuantDtype.INT7,
            QuantDtype.INT8.num_bits: QuantDtype.INT8
        }
        if self.eps == 0:
            raise ValueError("`epsilon` is zero may lead to divide zero error")
        for i, cell_and_name in enumerate(network.cells_and_names()):
            cell = cell_and_name[1]
            if i not in quantizable_idx:
                continue
            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
                cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0])
                if cell.weight_dtype is None:
                    raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0])
                if isinstance(cell, nn.Conv2dBnAct):
                    subcell_weight_para = cell.conv.weight.data.asnumpy()
                    if hasattr(cell.conv, 'gamma'):
                        scale_factor = (cell.conv.gamma.data.asnumpy() /
                                        np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
                    cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
                                                      min_init=min_init,
                                                      max_init=max_init)
                elif isinstance(cell, nn.DenseBnAct):
                    subcell_weight_para = cell.dense.weight.data.asnumpy()
                    if hasattr(cell.dense, 'gamma'):
                        scale_factor = (cell.dense.gamma.data.asnumpy() /
                                        np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
                    cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
                                                       min_init=min_init,
                                                       max_init=max_init)
        return network
--- a/mindspore/python/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/python/mindspore/compression/quant/quant_utils.py
@ -1,462 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Quantization utils.
 Note: This is an experimental interface that is subject to change and/or deletion.
 """
 from __future__ import absolute_import
 import numpy as np
 from mindspore._checkparam import Validator
 from mindspore import log as logger
 from ... import nn
 __all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
 def cal_quantization_params(input_min,
                            input_max,
                            quant_min,
                            quant_max,
                            data_type,
                            symmetric=False):
    r"""
    Calculate quantization params for scale and zero point.
    Args:
        input_min (numpy.ndarray): The dimension of channel or 1.
        input_max (numpy.ndarray): The dimension of channel or 1.
        quant_min (int): The minimum quantization integer.
        quant_max (int): The maximum quantization integer.
        data_type (numpy type) : Can be numpy int8, numpy uint8.
        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
    Returns:
        scale (numpy.ndarray): quantization param.
        zero point (numpy.ndarray): quantization param.
    """
    if quant_min == quant_max:
        raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.")
    input_max = np.maximum(0.0, input_max)
    input_min = np.minimum(0.0, input_min)
    if input_min.shape != input_max.shape:
        raise ValueError("input min shape should be equal to input max.")
    if len(input_min.shape) > 1:
        raise ValueError("input min and max shape should be one dim.")
    if (input_min > input_max).all():
        raise ValueError("input_min min should be less than input max.")
    if (input_max == input_min).all():
        return np.ones(input_min.shape), np.zeros(input_min.shape)
    # calculate scale
    if symmetric:
        input_max = np.maximum(-input_min, input_max)
        input_min = -input_max
    scale = (input_max - input_min) / (quant_max - quant_min)
    # calculate zero point
    if data_type == np.int8 and symmetric:
        zp = np.zeros(input_min.shape)
    else:
        if scale == 0.0:
            raise ValueError("scale can not be 0.")
        zp_double = quant_min - input_min / scale
        zp = np.floor(zp_double + 0.5)
    return scale, zp
 def get_quant_min_max(data_type, num_bits=8, narrow_range=False):
    """Calculate quantization params for minimum/maximum quantization integer"""
    if data_type == np.int8:
        quant_min = 0 - 2 ** (num_bits - 1)
        quant_max = 2 ** (num_bits - 1) - 1
    elif data_type == np.uint8:
        quant_min = 0
        quant_max = 2 ** num_bits - 1
    else:
        raise ValueError("Unsupported datatype({})".format(data_type))
    if narrow_range:
        quant_min = quant_min + 1
    return quant_min, quant_max
 def weight2int(data, scale, zero_point, quant_min, quant_max):
    r"""
    Calculate int8/uint8 weight from fp32. the formula is defined as:
    .. math::
        int8/uint8 = round(float/scale) + offset
    Args:
        data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
        scale (numpy.ndarray): The dimension of channel or 1.
        zero_point (numpy.ndarray): The dimension of channel or 1.
        quant_min (int): The minimum quantization integer.
        quant_max (int): The maximum quantization integer.
    Returns:
        weight (numpy.ndarray): The dimension of channel or 1.
    """
    if scale.shape != zero_point.shape:
        raise ValueError("`scale` and `zero_point` should have the same shape.")
    if scale.shape[0] < 0:
        raise ValueError("`scale` and `zero_point` shape should be greater than zero.")
    if 0 in scale:
        raise ValueError("Zero exist in `scale` which will lead to divide zero error.")
    if len(scale.shape) >= 1 and scale.shape[0] > 1:
        # for perchannel
        if scale.shape[0] == data.shape[0]:
            # `Conv2d` or `Dense` op weight
            shape_list = [-1] + [1] * len(data.shape[1:])
            scale = scale.reshape(shape_list)
            zero_point = zero_point.reshape(shape_list)
        elif scale.shape[0] == data.shape[1]:
            # `DepthwiseConv2d` op weight
            shape_list = [1, -1] + [1] * len(data.shape[2:])
            scale = scale.reshape(shape_list)
            zero_point = zero_point.reshape(shape_list)
        else:
            raise ValueError("Unsupported weight shape({})".format(data.shape))
    weight_int = np.round((data / scale) + zero_point)
    weight_int[weight_int > quant_max] = quant_max
    weight_int[weight_int < quant_min] = quant_min
    return weight_int
 def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
    """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
    minq = cell.minq.data.asnumpy()
    maxq = cell.maxq.data.asnumpy()
    # make sure maxq > 0 and minq <= 0
    if cell.mode == 'LEARNED_SCALE':
        maxq = np.abs(maxq)
        minq = -np.abs(minq)
    quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range)
    symmetric = cell.symmetric and not cell.neg_trunc
    scale, zp = cal_quantization_params(
        minq, maxq,
        quant_min, quant_max, data_type,
        symmetric=symmetric)
    return scale, zp, maxq, minq
 def fold_batchnorm(weight, cell_quant):
    r"""
    Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
    Args:
        weight (numpy.ndarray): Weight of `cell_quant`.
        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
    Returns:
        weight (numpy.ndarray): Folded weight.
        bias (numpy.ndarray): Folded bias.
    """
    variance = cell_quant.moving_variance.data.asnumpy()
    mean = cell_quant.moving_mean.data.asnumpy()
    gamma = cell_quant.gamma.data.asnumpy()
    beta = cell_quant.beta.data.asnumpy()
    epsilon = cell_quant.eps
    if epsilon == 0:
        raise ValueError("`epsilon` is zero may lead to divide zero error")
    sigma = np.sqrt(variance + epsilon)
    if gamma.shape[0] == weight.shape[0]:
        # `Conv2d` or `Dense` op weight
        shape_list = [-1] + [1] * len(weight.shape[1:])
        _gamma = gamma.reshape(shape_list)
        _sigma = sigma.reshape(shape_list)
    elif gamma.shape[0] == weight.shape[1]:
        # `DepthwiseConv2d` op weight
        shape_list = [1, -1] + [1] * len(weight.shape[2:])
        _gamma = gamma.reshape(shape_list)
        _sigma = sigma.reshape(shape_list)
    else:
        raise ValueError("Unsupported weight shape({})".format(weight.shape))
    weight = weight * _gamma / _sigma
    bias = beta - gamma * mean / sigma
    return weight, bias
 def without_fold_batchnorm(weight, cell_quant):
    r"""
    Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
    Args:
        weight (numpy.ndarray): Weight of `cell_quant`.
        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
    Returns:
        weight (numpy.ndarray): whihout folded weight.
        bias (numpy.ndarray): without folded bias.
    """
    variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
    mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
    gamma = cell_quant.batchnorm.gamma.data.asnumpy()
    beta = cell_quant.batchnorm.beta.data.asnumpy()
    epsilon = cell_quant.batchnorm.eps
    if epsilon == 0:
        raise ValueError("`epsilon` is zero may lead to divide zero error")
    sigma = np.sqrt(variance + epsilon)
    if gamma.shape[0] == weight.shape[0]:
        # `Conv2d` or `Dense` op weight
        shape_list = [-1] + [1] * len(weight.shape[1:])
        _gamma = gamma.reshape(shape_list)
        _sigma = sigma.reshape(shape_list)
    elif gamma.shape[0] == weight.shape[1]:
        # `DepthwiseConv2d` op weight
        shape_list = [1, -1] + [1] * len(weight.shape[2:])
        _gamma = gamma.reshape(shape_list)
        _sigma = sigma.reshape(shape_list)
    else:
        raise ValueError("Unsupported weight shape({})".format(weight.shape))
    weight = weight * _gamma / _sigma
    bias = beta - gamma * mean / sigma
    return weight, bias
 def compute_kl_threshold(data, bitwidth):
    r"""
    Using KL-J Distance to calculate the clip threshold.
    Args:
        - **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
        - **bitwidth** (QuantDtype) - The datatype of quantization.
    Outputs:
        Tensor with Shape 1. Threshold to calculate the data.
    """
    data_max = np.abs(data).max()
    if data_max < 1e-5:
        return 1e-5
    hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
    # For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
    # largest size, turn to use the default bins config.
    largest_bin_size = 1024
    if hist.shape[0] > largest_bin_size:
        hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
    sum_ = np.sum(hist)
    if sum_ == 0:
        hist = 0
    else:
        hist = hist / sum_
    cumsum = np.cumsum(hist)
    bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
    threshold = []
    scaling_factor = []
    kl = []
    if bit_pow_range + 1 > len(bin_edges) - 1:
        th_layer_out = bin_edges[-1]
        return float(th_layer_out)
    for i in range(bit_pow_range + 1, len(bin_edges), 1):
        threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
        threshold = np.concatenate((threshold, [threshold_tmp]))
        scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
        scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
        # forward interpolation
        cumsum_tmp = np.copy(cumsum)
        cumsum_tmp[(i - 1):] = 1
        fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
        fwd_xp = np.linspace(0.0, 1.0, i)
        fwd_fp = cumsum_tmp[:i]
        forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
        # backward interpolation
        bwd_x = np.linspace(0.0, 1.0, i)
        bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
        bwd_fp = forward_interp
        backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
        cumsum_tmp[:i] = backward_interp
        if 0 in cumsum_tmp:
            raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error")
        kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp))  # Kullback-Leibler-J
        kl = np.concatenate((kl, [kl_tmp]))
    th_layer_out = threshold[np.argmin(kl)]
    threshold = float(th_layer_out)
    if threshold < 1e-5:
        threshold = 1e-5
    return threshold
 def query_quant_layers(network):
    r"""
    Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
    quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
    layers, which not exist in practical execution, may appear.
    Args:
        network (Cell): input network
    Examples:
        >>> from mindspore.compression.quant import QuantizationAwareTraining
        >>> from mindspore.compression.quant.quant_utils import query_quant_layers
        >>> class LeNet5(nn.Cell):
        ...     def __init__(self, num_class=10, channel=1):
        ...         super(LeNet5, self).__init__()
        ...         self.type = "fusion"
        ...         self.num_class = num_class
        ...
        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
        ...         # change `nn.Dense` to `nn.DenseBnAct`
        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
        ...
        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        ...         self.flatten = nn.Flatten()
        ...
        ...     def construct(self, x):
        ...         x = self.conv1(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.conv2(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.flatten(x)
        ...         x = self.fc1(x)
        ...         x = self.fc2(x)
        ...         x = self.fc3(x)
        ...         return x
        ...
        >>> net = LeNet5()
        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
        >>> net_qat = quantizer.quantize(net)
        >>> query_quant_layers(net_qat)
        conv1.conv.fake_quant_weight                                       INT8
        conv1.activation.fake_quant_act                                    INT8
        conv2.conv.fake_quant_weight                                       INT8
        conv2.activation.fake_quant_act                                    INT8
        fc1.dense.fake_quant_weight                                        INT8
        fc1.activation.fake_quant_act                                      INT8
        fc2.dense.fake_quant_weight                                        INT8
        fc2.activation.fake_quant_act                                      INT8
        fc3.dense.fake_quant_weight                                        INT8
        fc3.activation.fake_quant_act                                      INT8
    """
    network = Validator.check_isinstance("network", network, nn.Cell)
    tplt = "{0:60}\t{1:10}"
    for cell_and_name in network.cells_and_names():
        cell_name = cell_and_name[0]
        cell = cell_and_name[1]
        if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
            logger.info(tplt.format(cell_name, cell.quant_dtype))
 def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
    r"""
    Load fp32 model parameters into quantization model.
    Args:
        quant_model(Cell): Quantization model.
        params_dict(dict): Parameter dict that stores fp32 parameters.
        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
            network. Default: None.
    Raises:
        TypeError: If `quant_new_params` is not None and is not list.
        ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
            nor in `quant_new_params`.
    Examples:
        >>> import mindspore as ms
        >>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
        >>> class LeNet5(nn.Cell):
        ...     def __init__(self, num_class=10, channel=1):
        ...         super(LeNet5, self).__init__()
        ...         self.type = "fusion"
        ...         self.num_class = num_class
        ...
        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
        ...         # change `nn.Dense` to `nn.DenseBnAct`
        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
        ...
        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        ...         self.flatten = nn.Flatten()
        ...
        ...     def construct(self, x):
        ...         x = self.conv1(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.conv2(x)
        ...         x = self.max_pool2d(x)
        ...         x = self.flatten(x)
        ...         x = self.fc1(x)
        ...         x = self.fc2(x)
        ...         x = self.fc3(x)
        ...         return x
        ...
        >>> net = LeNet5()
        >>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt"
        >>> param_dict = ms.load_checkpoint(ckpt_file_name)
        >>> load_nonquant_param_into_quant_net(net, param_dict)
    """
    if quant_new_params is not None and not isinstance(quant_new_params, list):
        raise TypeError("quant_new_params must be list or None.")
    iterable_dict = {
        'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
        'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
        'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
    }
    for param in params_dict.items():
        key_name = param[0].split(".")[-1]
        if key_name not in iterable_dict:
            iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value),
                                                       params_dict.items())))
    for name, param in quant_model.parameters_and_names():
        key_name = name.split(".")[-1]
        if key_name not in iterable_dict.keys():
            if key_name not in quant_new_params:
                raise ValueError(f"Can't find match parameter in ckpt, param name = {name}")
            continue
        value_param = next(iterable_dict[key_name], None)
        if value_param:
            param.set_data(value_param[1].data)
            logger.info(f'init model param {name} with checkpoint param {value_param[0]}')
    # Perform KL_init when learned scale quantization is executed.
    for cell_and_name in quant_model.cells_and_names():
        cell = cell_and_name[1]
        if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
                             nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
            subcell_weight_para = cell.weight.data.asnumpy()
            if hasattr(cell, 'gamma'):
                scale_factor = (cell.gamma.data.asnumpy() /
                                np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
            if cell.fake_quant_weight.per_channel:
                max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
                            for weight_para_each in subcell_weight_para]
                min_init = [-x for x in max_init]
            else:
                max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
                min_init = [-x for x in max_init]
            cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
                                         min_init=min_init, max_init=max_init)
--- a/mindspore/python/mindspore/compression/quant/quantizer.py
+++ b/mindspore/python/mindspore/compression/quant/quantizer.py
@ -1,68 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Note:
    Base Class of Quantizer. This is interface that is subject to change or deletion.
 """
 from __future__ import absolute_import
 from abc import ABC, abstractmethod
 from enum import Enum
 from mindspore._checkparam import Validator
 __all__ = ["OptimizeOption"]
 class OptimizeOption(Enum):
    r"""
    An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
    """
    # using quantization aware training
    QAT = "QAT"
    # using the learned scale quantization
    LEARNED_SCALE = "LEARNED_SCALE"
    def __str__(self):
        return str(self.value)
 class Quantizer(ABC):
    """
    Base class of Quantizer. You can implement different kind of quantizer to get different quantization result.
    Notes:
        This class is an abstract class.
    Args:
        optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default:
            OptimizeOption.QAT.
    """
    def __init__(self,
                 optimize_option=OptimizeOption.QAT):
        if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple):
            optimize_option = [optimize_option]
        for option in optimize_option:
            option = Validator.check_isinstance("optimize_option", option, OptimizeOption)
        self.optimize_option = optimize_option
    @abstractmethod
    def quantize(self, network):
        """
        Quant API to convert input network to a quantization aware training network
        Args:
            network (Cell): network to be quantized.
        """
--- a/mindspore/python/mindspore/nn/layer/init.py
+++ b/mindspore/python/mindspore/nn/layer/init.py
@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network.
 from __future__ import absolute_import
 from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \
-    image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
+    image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
 from mindspore.nn.layer.activation import *
 from mindspore.nn.layer.normalization import *
 from mindspore.nn.layer.container import *
@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import *
 from mindspore.nn.layer.embedding import *
 from mindspore.nn.layer.pooling import *
 from mindspore.nn.layer.image import *
 from mindspore.nn.layer.quant import *
 from mindspore.nn.layer.math import *
 from mindspore.nn.layer.combined import *
 from mindspore.nn.layer.timedistributed import *
@ -53,7 +52,6 @@ __all__.extend(basic.__all__)
 __all__.extend(embedding.__all__)
 __all__.extend(pooling.__all__)
 __all__.extend(image.__all__)
 __all__.extend(quant.__all__)
 __all__.extend(math.__all__)
 __all__.extend(combined.__all__)
 __all__.extend(timedistributed.__all__)
--- a/mindspore/python/mindspore/nn/layer/quant.py
+++ b/mindspore/python/mindspore/nn/layer/quant.py
--- a/mindspore/python/mindspore/train/serialization.py
+++ b/mindspore/python/mindspore/train/serialization.py
@ -25,7 +25,6 @@ import stat
 import threading
 from threading import Thread, Lock
 from collections import defaultdict, OrderedDict
 from functools import wraps
 from io import BytesIO
 import math
@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore.common._utils import is_shape_unknown
 from mindspore.communication.management import get_rank, get_group_size
 from mindspore.compression.export import quant_export
 from mindspore.experimental import MapParameter
 from mindspore.parallel._cell_wrapper import get_allgather_cell
 from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
        kwargs (dict): Configuration options dictionary.
            - quant_mode (str): If the network is a quantization aware training network, the quant_mode should
              be set to "QUANT", else the quant_mode should be set to "NONQUANT".
            - mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network.
              Default: 127.5.
            - std_dev (float): The variance of input data after preprocessing,
              used for quantizing the first layer of the network. Default: 127.5.
            - enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32.
            - enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set.
@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
        inputs = tuple(inputs_col)
    file_name = os.path.realpath(file_name)
    net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
    if 'enc_key' in kwargs.keys():
        kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs)
    _export(net, file_name, file_format, *inputs, **kwargs)
@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset):
            model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False
 def quant_mode_manage(func):
    """Inherit the quant_mode in old version."""
    @wraps(func)
    def wrapper(network, *inputs, file_format, **kwargs):
        if 'quant_mode' not in kwargs:
            return network
        quant_mode = kwargs.get('quant_mode')
        if not isinstance(quant_mode, str):
            raise TypeError("For 'export', the type of 'quant_mode' should be string, "
                            "but got {}.".format(type(quant_mode)))
        if quant_mode in ('AUTO', 'MANUAL'):
            kwargs['quant_mode'] = 'QUANT'
        return func(network, *inputs, file_format=file_format, **kwargs)
    return wrapper
@quant_mode_manage
 def _quant_export(network, *inputs, file_format, **kwargs):
    """Exports MindSpore quantization predict model to deploy with AIR and MINDIR."""
    supported_device = ["Ascend", "GPU"]
    supported_formats = ['AIR', 'MINDIR']
    quant_mode_formats = ['QUANT', 'NONQUANT']
    quant_mode = kwargs['quant_mode']
    if quant_mode not in quant_mode_formats:
        raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, "
                       f"but got {quant_mode}.")
    if quant_mode == 'NONQUANT':
        return network
    quant_net = copy.deepcopy(network)
    quant_net._create_time = int(time.time() * 1e9)
    mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean')
    std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev')
    mean = Validator.check_value_type("mean", mean, (int, float))
    std_dev = Validator.check_value_type("std_dev", std_dev, (int, float))
    if context.get_context('device_target') not in supported_device:
        raise KeyError(f"For 'export', quant export only support {supported_device} device target now, "
                       f"but got {context.get_context('device_target')}")
    if file_format not in supported_formats:
        raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, "
                         f"but got {file_format}.")
    quant_net.set_train(False)
    if file_format == "MINDIR":
        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True)
    else:
        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs)
    deploy_net = exporter.run()
    return deploy_net
 def parse_print(print_file_name):
    """
    Parse data file generated by mindspore.ops.Print.
--- a/tests/st/quantization/lenet_quant/config.py
+++ b/tests/st/quantization/lenet_quant/config.py
@ -1,31 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 network config setting, will be used in test_lenet_quant.py
 """
 from easydict import EasyDict as edict
 quant_cfg = edict({
    'num_classes': 10,
    'lr': 0.01,
    'momentum': 0.9,
    'epoch_size': 10,
    'batch_size': 64,
    'buffer_size': 1000,
    'image_height': 32,
    'image_width': 32,
    'keep_checkpoint_max': 10,
 })
--- a/tests/st/quantization/lenet_quant/dataset.py
+++ b/tests/st/quantization/lenet_quant/dataset.py
@ -1,60 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Produce the dataset
 """
 import mindspore.dataset as ds
 import mindspore.dataset.vision as CV
 import mindspore.dataset.transforms as C
 from mindspore.dataset.vision import Inter
 from mindspore.common import dtype as mstype
 def create_dataset(data_path, batch_size=32, repeat_size=1,
                   num_parallel_workers=1):
    """
    create dataset for train or test
    """
    # define dataset
    mnist_ds = ds.MnistDataset(data_path)
    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0
    rescale_nml = 1 / 0.3081
    shift_nml = -1 * 0.1307 / 0.3081
    # define map operations
    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
    rescale_op = CV.Rescale(rescale, shift)
    hwc2chw_op = CV.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)
    # apply map operations on images
    mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
    # apply DatasetOps
    buffer_size = 10000
    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)  # 10000 as in LeNet train script
    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
    mnist_ds = mnist_ds.repeat(repeat_size)
    return mnist_ds
--- a/tests/st/quantization/lenet_quant/lenet_fusion.py
+++ b/tests/st/quantization/lenet_quant/lenet_fusion.py
@ -1,58 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """LeNet."""
 import mindspore.nn as nn
 class LeNet5(nn.Cell):
    """
    Lenet network
    Args:
        num_class (int): Num classes. Default: 10.
    Returns:
        Tensor, output tensor
    Examples:
        >>> LeNet(num_class=10)
    """
    def __init__(self, num_class=10, channel=1):
        super(LeNet5, self).__init__()
        self.type = "fusion"
        self.num_class = num_class
        # change `nn.Conv2d` to `nn.Conv2dBnAct`
        self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
        self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
        # change `nn.Dense` to `nn.DenseBnAct`
        self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
        self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
        self.fc3 = nn.DenseBnAct(84, self.num_class)
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
    def construct(self, x):
        x = self.conv1(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.max_pool2d(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x
--- a/tests/st/quantization/lenet_quant/test_lenet_quant.py
+++ b/tests/st/quantization/lenet_quant/test_lenet_quant.py
@ -1,199 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 train and infer lenet quantization network
 """
 import os
 import pytest
 from mindspore import context
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
 import mindspore.nn as nn
 from mindspore.train.metrics import Accuracy
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
 from mindspore import load_checkpoint, load_param_into_net, export
 from mindspore.train import Model
 from mindspore.compression.quant import QuantizationAwareTraining
 from mindspore.compression.quant.quantizer import OptimizeOption
 from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
 from dataset import create_dataset
 from config import quant_cfg
 from lenet_fusion import LeNet5 as LeNet5Fusion
 import numpy as np
 data_path = "/home/workspace/mindspore_dataset/mnist"
 lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt"
 def train_lenet_quant(optim_option="QAT"):
    cfg = quant_cfg
    ckpt_path = lenet_ckpt_path
    ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1)
    step_size = ds_train.get_dataset_size()
    # define fusion network
    network = LeNet5Fusion(cfg.num_classes)
    # load quantization aware network checkpoint
    param_dict = load_checkpoint(ckpt_path)
    load_nonquant_param_into_quant_net(network, param_dict)
    # convert fusion network to quantization aware network
    if optim_option == "LEARNED_SCALE":
        quant_optim_otions = OptimizeOption.LEARNED_SCALE
        quantizer = QuantizationAwareTraining(bn_fold=False,
                                              per_channel=[True, False],
                                              symmetric=[True, True],
                                              narrow_range=[True, True],
                                              freeze_bn=0,
                                              quant_delay=0,
                                              one_conv_fold=True,
                                              optimize_option=quant_optim_otions)
    else:
        quantizer = QuantizationAwareTraining(quant_delay=900,
                                              bn_fold=False,
                                              per_channel=[True, False],
                                              symmetric=[True, False])
    network = quantizer.quantize(network)
    # define network loss
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    # define network optimization
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    # call back and monitor
    config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size,
                                   keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt)
    # define model
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    print("============== Starting Training ==============")
    model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()],
                dataset_sink_mode=True)
    print("============== End Training ==============")
 def eval_quant(optim_option="QAT"):
    cfg = quant_cfg
    ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1)
    ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt'
    # define fusion network
    network = LeNet5Fusion(cfg.num_classes)
    # convert fusion network to quantization aware network
    if optim_option == "LEARNED_SCALE":
        quant_optim_otions = OptimizeOption.LEARNED_SCALE
        quantizer = QuantizationAwareTraining(bn_fold=False,
                                              per_channel=[True, False],
                                              symmetric=[True, True],
                                              narrow_range=[True, True],
                                              freeze_bn=0,
                                              quant_delay=0,
                                              one_conv_fold=True,
                                              optimize_option=quant_optim_otions)
    else:
        quantizer = QuantizationAwareTraining(quant_delay=0,
                                              bn_fold=False,
                                              freeze_bn=10000,
                                              per_channel=[True, False],
                                              symmetric=[True, False])
    network = quantizer.quantize(network)
    # define loss
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    # define network optimization
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    # call back and monitor
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    # load quantization aware network checkpoint
    param_dict = load_checkpoint(ckpt_path)
    not_load_param = load_param_into_net(network, param_dict)
    if not_load_param:
        raise ValueError("Load param into net fail!")
    print("============== Starting Testing ==============")
    acc = model.eval(ds_eval, dataset_sink_mode=True)
    print("============== {} ==============".format(acc))
    assert acc['Accuracy'] > 0.98
 def export_lenet(optim_option="QAT", file_format="MINDIR"):
    cfg = quant_cfg
    # define fusion network
    network = LeNet5Fusion(cfg.num_classes)
    # convert fusion network to quantization aware network
    if optim_option == "LEARNED_SCALE":
        quant_optim_otions = OptimizeOption.LEARNED_SCALE
        quantizer = QuantizationAwareTraining(bn_fold=False,
                                              per_channel=[True, False],
                                              symmetric=[True, True],
                                              narrow_range=[True, True],
                                              freeze_bn=0,
                                              quant_delay=0,
                                              one_conv_fold=True,
                                              optimize_option=quant_optim_otions)
    else:
        quantizer = QuantizationAwareTraining(quant_delay=0,
                                              bn_fold=False,
                                              freeze_bn=10000,
                                              per_channel=[True, False],
                                              symmetric=[True, False])
    network = quantizer.quantize(network)
    # export network
    inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32)
    export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO')
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_lenet_quant():
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    train_lenet_quant()
    eval_quant()
    export_lenet()
    train_lenet_quant(optim_option="LEARNED_SCALE")
    eval_quant(optim_option="LEARNED_SCALE")
    export_lenet(optim_option="LEARNED_SCALE")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_lenet_quant_ascend():
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    train_lenet_quant(optim_option="LEARNED_SCALE")
    eval_quant(optim_option="LEARNED_SCALE")
    export_lenet(optim_option="LEARNED_SCALE", file_format="AIR")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_lenet_quant_ascend_pynative():
    """
    test_lenet_quant_ascend_pynative
    Features: test_lenet_quant_ascend_pynative
    Description: test_lenet_quant_ascend_pynative pynative mode
    Expectation: None
    """
    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
    train_lenet_quant(optim_option="QAT")
--- a/tests/st/quantization/mobilenetv2_quant/dataset.py
+++ b/tests/st/quantization/mobilenetv2_quant/dataset.py
@ -1,67 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ create train dataset. """
 from functools import partial
 import mindspore.dataset as ds
 import mindspore.common.dtype as mstype
 import mindspore.dataset.vision as C
 import mindspore.dataset.transforms as C2
 def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
    """
    create a train dataset
    Args:
        dataset_path(string): the path of dataset.
        config(EasyDict)：the basic config for training
        repeat_num(int): the repeat times of dataset. Default: 1.
        batch_size(int): the batch size of dataset. Default: 32.
    Returns:
        dataset
    """
    load_func = partial(ds.Cifar10Dataset, dataset_path)
    cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
    resize_height = config.image_height
    resize_width = config.image_width
    rescale = 1.0 / 255.0
    shift = 0.0
    # define map operations
    # interpolation default BILINEAR
    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize(
        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    changeswap_op = C.HWC2CHW()
    type_cast_op = C2.TypeCast(mstype.int32)
    c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
    # apply map operations on images
    cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
    # apply batch operations
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    cifar_ds = cifar_ds.repeat(repeat_num)
    return cifar_ds
--- a/tests/st/quantization/mobilenetv2_quant/lr_generator.py
+++ b/tests/st/quantization/mobilenetv2_quant/lr_generator.py
@ -1,56 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
 import math
 import numpy as np
 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
    """
    generate learning rate array
    Args:
       global_step(int): total steps of the training
       lr_init(float): init learning rate
       lr_end(float): end learning rate
       lr_max(float): max learning rate
       warmup_epochs(int): number of warmup epochs
       total_epochs(int): total epoch of training
       steps_per_epoch(int): steps of one epoch
    Returns:
       np.array, learning rate array
    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    for i in range(total_steps):
        if i < warmup_steps:
            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
        else:
            lr = lr_end + \
                (lr_max - lr_end) * \
                (1. + math.cos(math.pi * (i - warmup_steps) /
                               (total_steps - warmup_steps))) / 2.
        if lr < 0.0:
            lr = 0.0
        lr_each_step.append(lr)
    current_step = global_step
    lr_each_step = np.array(lr_each_step).astype(np.float32)
    learning_rate = lr_each_step[current_step:]
    return learning_rate
--- a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
+++ b/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
@ -1,263 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """MobileNetV2 Quant model define"""
 import numpy as np
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore import Tensor
 __all__ = ['mobilenetV2']
 def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v
 class GlobalAvgPooling(nn.Cell):
    """
    Global avg pooling definition.
    Args:
    Returns:
        Tensor, output tensor.
    Examples:
        >>> GlobalAvgPooling()
    """
    def __init__(self):
        super(GlobalAvgPooling, self).__init__()
        self.mean = P.ReduceMean(keep_dims=False)
    def construct(self, x):
        x = self.mean(x, (2, 3))
        return x
 class ConvBNReLU(nn.Cell):
    """
    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
    Args:
        in_planes (int): Input channel.
        out_planes (int): Output channel.
        kernel_size (int): Input kernel size.
        stride (int): Stride size for the first convolutional layer. Default: 1.
        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
    Returns:
        Tensor, output tensor.
    Examples:
        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
    """
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
                                   stride=stride,
                                   pad_mode='pad',
                                   padding=padding,
                                   group=groups,
                                   has_bn=True,
                                   activation='relu')
    def construct(self, x):
        x = self.conv(x)
        return x
 class InvertedResidual(nn.Cell):
    """
    Mobilenetv2 residual block definition.
    Args:
        inp (int): Input channel.
        oup (int): Output channel.
        stride (int): Stride size for the first convolutional layer. Default: 1.
        expand_ratio (int): expand ration of input channel
    Returns:
        Tensor, output tensor.
    Examples:
        >>> ResidualBlock(3, 256, 1, 1)
    """
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2]
        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = stride == 1 and inp == oup
        layers = []
        if expand_ratio != 1:
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim, hidden_dim,
                       stride=stride, groups=hidden_dim),
            # pw-linear
            nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
                           pad_mode='pad', padding=0, group=1, has_bn=True)
        ])
        self.conv = nn.SequentialCell(layers)
        self.add = P.Add()
    def construct(self, x):
        out = self.conv(x)
        if self.use_res_connect:
            out = self.add(out, x)
        return out
 class mobilenetV2(nn.Cell):
    """
    mobilenetV2 fusion architecture.
    Args:
        class_num (Cell): number of classes.
        width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
        has_dropout (bool): Is dropout used. Default is false
        inverted_residual_setting (list): Inverted residual settings. Default is None
        round_nearest (list): Channel round to . Default is 8
    Returns:
        Tensor, output tensor.
    Examples:
        >>> mobilenetV2(num_classes=1000)
    """
    def __init__(self, num_classes=1000, width_mult=1.,
                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
        super(mobilenetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280
        # setting of inverted residual blocks
        self.cfgs = inverted_residual_setting
        if inverted_residual_setting is None:
            self.cfgs = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]
        # building first layer
        input_channel = _make_divisible(
            input_channel * width_mult, round_nearest)
        self.out_channels = _make_divisible(
            last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(
                    block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(
            input_channel, self.out_channels, kernel_size=1))
        # make it nn.CellList
        self.features = nn.SequentialCell(features)
        # mobilenet head
        head = ([GlobalAvgPooling(),
                 nn.DenseBnAct(self.out_channels, num_classes,
                               has_bias=True, has_bn=False)
                 ] if not has_dropout else
                [GlobalAvgPooling(),
                 nn.Dropout(0.2),
                 nn.DenseBnAct(self.out_channels, num_classes,
                               has_bias=True, has_bn=False)
                 ])
        self.head = nn.SequentialCell(head)
        # init weights
        self.init_parameters_data()
        self._initialize_weights()
    def construct(self, x):
        x = self.features(x)
        x = self.head(x)
        return x
    def _initialize_weights(self):
        """
        Initialize weights.
        Args:
        Returns:
            None.
        Examples:
            >>> _initialize_weights()
        """
        self.init_parameters_data()
        for _, m in self.cells_and_names():
            np.random.seed(1)
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
                                            m.weight.data.shape).astype("float32"))
                m.weight.set_data(w)
                if m.bias is not None:
                    m.bias.set_data(
                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.Conv2dBnAct):
                n = m.conv.kernel_size[0] * \
                    m.conv.kernel_size[1] * m.conv.out_channels
                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
                                            m.conv.weight.data.shape).astype("float32"))
                m.conv.weight.set_data(w)
                if m.conv.bias is not None:
                    m.conv.bias.set_data(
                        Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.BatchNorm2d):
                m.gamma.set_data(
                    Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
                m.beta.set_data(
                    Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
            elif isinstance(m, nn.Dense):
                m.weight.set_data(Tensor(np.random.normal(
                    0, 0.01, m.weight.data.shape).astype("float32")))
                if m.bias is not None:
                    m.bias.set_data(
                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
            elif isinstance(m, nn.DenseBnAct):
                m.dense.weight.set_data(
                    Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
                if m.dense.bias is not None:
                    m.dense.bias.set_data(
                        Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
+++ b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
@ -1,136 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Train Mobilenetv2_quant on Cifar10"""
 import pytest
 import numpy as np
 from easydict import EasyDict as ed
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.train.model import Model
 from mindspore.compression.quant import QuantizationAwareTraining
 from mindspore.common import set_seed
 from dataset import create_dataset
 from lr_generator import get_lr
 from utils import Monitor, CrossEntropyWithLabelSmooth
 from mobilenetV2 import mobilenetV2
 config_ascend_quant = ed({
    "num_classes": 10,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 200,
    "step_threshold": 10,
    "data_load_mode": "mindata",
    "epoch_size": 1,
    "start_epoch": 200,
    "warmup_epochs": 1,
    "lr": 0.3,
    "momentum": 0.9,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
 })
 dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
 def train():
    """train"""
    config = config_ascend_quant
    print("training configure: {}".format(config))
    epoch_size = config.epoch_size
    # define network
    network = mobilenetV2(num_classes=config.num_classes)
    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(
            smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    # convert fusion network to quantization aware network
    quantizer = QuantizationAwareTraining(bn_fold=True,
                                          per_channel=[True, False],
                                          symmetric=[True, False])
    network = quantizer.quantize(network)
    # get learning rate
    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
                       lr_init=0,
                       lr_end=0,
                       lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs,
                       total_epochs=epoch_size + config.start_epoch,
                       steps_per_epoch=step_size))
    # define optimization
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
                      config.weight_decay)
    # define model
    model = Model(network, loss_fn=loss, optimizer=opt)
    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)
    callback = [monitor]
    model.train(epoch_size, dataset, callbacks=callback,
                dataset_sink_mode=False)
    print("============== End Training ==============")
    export_time_used = 650
    train_time = monitor.step_mseconds
    print('train_time_used:{}'.format(train_time))
    assert train_time < export_time_used
    expect_avg_step_loss = 2.32
    avg_step_loss = np.mean(np.array(monitor.losses))
    print("average step loss:{}".format(avg_step_loss))
    assert avg_step_loss < expect_avg_step_loss
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_single
 def test_mobilenetv2_quant():
    """
    test_mobilenetv2_quant
    Features: test_mobilenetv2_quant
    Description: test_mobilenetv2_quant graph mode
    Expectation: None
    """
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    train()
 if __name__ == '__main__':
    test_mobilenetv2_quant()
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
+++ b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
@ -1,121 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Train Mobilenetv2_quant gpu on Cifar10"""
 import pytest
 import numpy as np
 from easydict import EasyDict as ed
 from mindspore import context
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.train.model import Model
 from mindspore.compression.quant import QuantizationAwareTraining
 from mindspore.common import set_seed
 from dataset import create_dataset
 from lr_generator import get_lr
 from utils import Monitor, CrossEntropyWithLabelSmooth
 from mobilenetV2 import mobilenetV2
 config_ascend_quant = ed({
    "num_classes": 10,
    "image_height": 224,
    "image_width": 224,
    "batch_size": 300,
    "step_threshold": 10,
    "data_load_mode": "mindata",
    "epoch_size": 1,
    "start_epoch": 200,
    "warmup_epochs": 1,
    "lr": 0.05,
    "momentum": 0.997,
    "weight_decay": 4e-5,
    "label_smooth": 0.1,
    "loss_scale": 1024,
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 300,
    "save_checkpoint_path": "./checkpoint",
 })
 dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
@pytest.mark.level2
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_single
 def test_mobilenetv2_quant():
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
    config = config_ascend_quant
    print("training configure: {}".format(config))
    epoch_size = config.epoch_size
    # define network
    network = mobilenetV2(num_classes=config.num_classes)
    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(
            smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    # convert fusion network to quantization aware network
    quantizer = QuantizationAwareTraining(bn_fold=True,
                                          per_channel=[True, False],
                                          symmetric=[False, False])
    network = quantizer.quantize(network)
    # get learning rate
    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
                       lr_init=0,
                       lr_end=0,
                       lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs,
                       total_epochs=epoch_size + config.start_epoch,
                       steps_per_epoch=step_size))
    # define optimization
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
                      config.weight_decay)
    # define model
    model = Model(network, loss_fn=loss, optimizer=opt)
    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)
    callback = [monitor]
    model.train(epoch_size, dataset, callbacks=callback,
                dataset_sink_mode=False)
    print("============== End Training ==============")
    train_time = monitor.step_mseconds
    print('train_time_used:{}'.format(train_time))
    avg_step_loss = np.mean(np.array(monitor.losses))
    print("average step loss:{}".format(avg_step_loss))
    expect_avg_step_loss = 2.32
    assert avg_step_loss < expect_avg_step_loss
    export_time_used = 960
    assert train_time < export_time_used
 if __name__ == '__main__':
    test_mobilenetv2_quant()
--- a/tests/st/quantization/mobilenetv2_quant/utils.py
+++ b/tests/st/quantization/mobilenetv2_quant/utils.py
@ -1,120 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """MobileNetV2 utils"""
 import time
 import numpy as np
 from mindspore.train.callback import Callback
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.nn.loss.loss import LossBase
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype
 class Monitor(Callback):
    """
    Monitor loss and time.
    Args:
        lr_init (numpy array): train lr
    Returns:
        None
    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """
    def __init__(self, lr_init=None, step_threshold=10):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)
        self.step_threshold = step_threshold
        self.step_mseconds = 50000
    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()
    def epoch_end(self, run_context):
        cb_params = run_context.original_args()
        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)))
        self.epoch_mseconds = epoch_mseconds
    def step_begin(self, run_context):
        self.step_time = time.time()
    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        self.step_mseconds = min(self.step_mseconds, step_mseconds)
        step_loss = cb_params.net_outputs
        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())
        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
            1, cb_params.batch_num, step_loss,
            np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
        if cb_params.cur_step_num == self.step_threshold:
            run_context.request_stop()
 class CrossEntropyWithLabelSmooth(LossBase):
    """
    CrossEntropyWith LabelSmooth.
    Args:
        smooth_factor (float): smooth factor, default=0.
        num_classes (int): num classes
    Returns:
        None.
    Examples:
        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
    """
    def __init__(self, smooth_factor=0., num_classes=1000):
        super(CrossEntropyWithLabelSmooth, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor /
                                (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
        self.cast = P.Cast()
    def construct(self, logit, label):
        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
                                    self.on_value, self.off_value)
        out_loss = self.ce(logit, one_hot_label)
        out_loss = self.mean(out_loss, 0)
        return out_loss
--- a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
+++ b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
@ -22,20 +22,18 @@ from mindspore import nn
 from mindspore import context
 from mindspore import Tensor
 from mindspore.common import set_seed
 from mindspore.compression.quant import create_quant_config
 class Net(nn.Cell):
-    def __init__(self, qconfig):
+    def __init__(self):
        super(Net, self).__init__()
-        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1),
+        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid')
                                         pad_mode='valid', quant_config=qconfig)
    def construct(self, x):
        return self.conv(x)
 def test_conv2d_bn_fold_quant():
    set_seed(1)
-    quant_config = create_quant_config()
+    network = Net()
    network = Net(quant_config)
    inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32))
    label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32))
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9)
@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant():
    train_network = nn.TrainOneStepCell(net_with_loss, opt)
    train_network.set_train()
    out_loss = train_network(inputs, label)
    print("------------------", out_loss.asnumpy())
    expect_loss = np.array([0.940427])
    error = np.array([0.1])
    diff = out_loss.asnumpy() - expect_loss
    assert np.all(abs(diff) < error)
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
--- a/tests/st/quantization/resnet50_quant/dataset.py
+++ b/tests/st/quantization/resnet50_quant/dataset.py
@ -1,67 +0,0 @@
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ create train dataset. """
 from functools import partial
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.transforms as C2
 import mindspore.dataset.vision as C
 def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
    """
    create a train dataset
    Args:
        dataset_path(string): the path of dataset.
        config(EasyDict)：the basic config for training
        repeat_num(int): the repeat times of dataset. Default: 1.
        batch_size(int): the batch size of dataset. Default: 32.
    Returns:
        dataset
    """
    load_func = partial(ds.Cifar10Dataset, dataset_path)
    data_set = load_func(num_parallel_workers=8, shuffle=False)
    resize_height = config.image_height
    resize_width = config.image_width
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
    # define map operations
    resize_op = C.Resize((resize_height, resize_width))
    normalize_op = C.Normalize(mean=mean, std=std)
    changeswap_op = C.HWC2CHW()
    c_trans = [resize_op, normalize_op, changeswap_op]
    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=c_trans, input_columns="image",
                            num_parallel_workers=8)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label", num_parallel_workers=8)
    # apply batch operations
    data_set = data_set.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set
--- a/tests/st/quantization/resnet50_quant/lr_generator.py
+++ b/tests/st/quantization/resnet50_quant/lr_generator.py
@ -1,93 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
 import math
 import numpy as np
 def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
    """
    generate learning rate array
    Args:
       lr_init(float): init learning rate
       lr_end(float): end learning rate
       lr_max(float): max learning rate
       warmup_epochs(int): number of warmup epochs
       total_epochs(int): total epoch of training
       steps_per_epoch(int): steps of one epoch
       lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
    Returns:
       np.array, learning rate array
    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    if lr_decay_mode == 'steps':
        decay_epoch_index = [0.3 * total_steps,
                             0.6 * total_steps, 0.8 * total_steps]
        for i in range(total_steps):
            if i < decay_epoch_index[0]:
                lr = lr_max
            elif i < decay_epoch_index[1]:
                lr = lr_max * 0.1
            elif i < decay_epoch_index[2]:
                lr = lr_max * 0.01
            else:
                lr = lr_max * 0.001
            lr_each_step.append(lr)
    elif lr_decay_mode == 'poly':
        if warmup_steps != 0:
            inc_each_step = (float(lr_max) - float(lr_init)) / \
                float(warmup_steps)
        else:
            inc_each_step = 0
        for i in range(total_steps):
            if i < warmup_steps:
                lr = float(lr_init) + inc_each_step * float(i)
            else:
                base = (1.0 - (float(i) - float(warmup_steps)) /
                        (float(total_steps) - float(warmup_steps)))
                lr = float(lr_max) * base * base
                if lr < 0.0:
                    lr = 0.0
            lr_each_step.append(lr)
    elif lr_decay_mode == 'cosine':
        decay_steps = total_steps - warmup_steps
        for i in range(total_steps):
            if i < warmup_steps:
                lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
                lr = float(lr_init) + lr_inc * (i + 1)
            else:
                linear_decay = (total_steps - i) / decay_steps
                cosine_decay = 0.5 * \
                    (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
                decayed = linear_decay * cosine_decay + 0.00001
                lr = lr_max * decayed
            lr_each_step.append(lr)
    else:
        for i in range(total_steps):
            if i < warmup_steps:
                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
            else:
                lr = lr_max - (lr_max - lr_end) * \
                    (i - warmup_steps) / (total_steps - warmup_steps)
            lr_each_step.append(lr)
    learning_rate = np.array(lr_each_step).astype(np.float32)
    return learning_rate
--- a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
+++ b/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
@ -1,346 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """ResNet."""
 import numpy as np
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from mindspore.ops import operations as P
 from mindspore import Tensor
 from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant
 from mindspore.compression.quant import create_quant_config
 _ema_decay = 0.999
 _symmetric = True
 _fake = True
 _per_channel = True
 _quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))
 def _weight_variable(shape, factor=0.01):
    init_value = np.random.randn(*shape).astype(np.float32) * factor
    return Tensor(init_value)
 def _conv3x3(in_channel, out_channel, stride=1):
    weight_shape = (out_channel, in_channel, 3, 3)
    weight = _weight_variable(weight_shape)
    return nn.Conv2d(in_channel, out_channel,
                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
 def _conv1x1(in_channel, out_channel, stride=1):
    weight_shape = (out_channel, in_channel, 1, 1)
    weight = _weight_variable(weight_shape)
    return nn.Conv2d(in_channel, out_channel,
                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
 def _conv7x7(in_channel, out_channel, stride=1):
    weight_shape = (out_channel, in_channel, 7, 7)
    weight = _weight_variable(weight_shape)
    return nn.Conv2d(in_channel, out_channel,
                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
 def _bn(channel):
    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
 def _bn_last(channel):
    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
 def _fc(in_channel, out_channel):
    weight_shape = (out_channel, in_channel)
    weight = _weight_variable(weight_shape)
    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
 class ConvBNReLU(nn.Cell):
    """
    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
    Args:
        in_planes (int): Input channel.
        out_planes (int): Output channel.
        kernel_size (int): Input kernel size.
        stride (int): Stride size for the first convolutional layer. Default: 1.
        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
    Returns:
        Tensor, output tensor.
    Examples:
        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
    """
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        super(ConvBNReLU, self).__init__()
        padding = (kernel_size - 1) // 2
        conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
                                 group=groups, fake=_fake, quant_config=_quant_config)
        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
        self.features = nn.SequentialCell(layers)
    def construct(self, x):
        output = self.features(x)
        return output
 class ResidualBlock(nn.Cell):
    """
    ResNet V1 residual block definition.
    Args:
        in_channel (int): Input channel.
        out_channel (int): Output channel.
        stride (int): Stride size for the first convolutional layer. Default: 1.
    Returns:
        Tensor, output tensor.
    Examples:
        >>> ResidualBlock(3, 256, stride=2)
    """
    expansion = 4
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1):
        super(ResidualBlock, self).__init__()
        channel = out_channel // self.expansion
        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
        self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
                                                          quant_config=_quant_config,
                                                          kernel_size=1, stride=1, pad_mode='same', padding=0),
                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
                                        ]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
                                                                           quant_config=_quant_config,
                                                                           kernel_size=1, stride=1,
                                                                           pad_mode='same', padding=0)
        self.down_sample = False
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
        self.down_sample_layer = None
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel,
                                                                          quant_config=_quant_config,
                                                                          kernel_size=1, stride=stride,
                                                                          pad_mode='same', padding=0),
                                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
                                                                                    symmetric=False)
                                                        ]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel,
                                                                                           fake=_fake,
                                                                                           quant_config=_quant_config,
                                                                                           kernel_size=1,
                                                                                           stride=stride,
                                                                                           pad_mode='same',
                                                                                           padding=0)
        self.add = nn.TensorAddQuant()
        self.relu = P.ReLU()
    def construct(self, x):
        identity = x
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        if self.down_sample:
            identity = self.down_sample_layer(identity)
        out = self.add(out, identity)
        out = self.relu(out)
        return out
 class ResNet(nn.Cell):
    """
    ResNet architecture.
    Args:
        block (Cell): Block for network.
        layer_nums (list): Numbers of block in different layers.
        in_channels (list): Input channel in each layer.
        out_channels (list): Output channel in each layer.
        strides (list):  Stride size in each layer.
        num_classes (int): The number of classes that the training images are belonging to.
    Returns:
        Tensor, output tensor.
    Examples:
        >>> ResNet(ResidualBlock,
        >>>        [3, 4, 6, 3],
        >>>        [64, 256, 512, 1024],
        >>>        [256, 512, 1024, 2048],
        >>>        [1, 2, 2, 2],
        >>>        10)
    """
    def __init__(self,
                 block,
                 layer_nums,
                 in_channels,
                 out_channels,
                 strides,
                 num_classes):
        super(ResNet, self).__init__()
        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
        self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
                                       out_channel=out_channels[0],
                                       stride=strides[0])
        self.layer2 = self._make_layer(block,
                                       layer_nums[1],
                                       in_channel=in_channels[1],
                                       out_channel=out_channels[1],
                                       stride=strides[1])
        self.layer3 = self._make_layer(block,
                                       layer_nums[2],
                                       in_channel=in_channels[2],
                                       out_channel=out_channels[2],
                                       stride=strides[2])
        self.layer4 = self._make_layer(block,
                                       layer_nums[3],
                                       in_channel=in_channels[3],
                                       out_channel=out_channels[3],
                                       stride=strides[3])
        self.mean = P.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
        self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)
        # init weights
        self._initialize_weights()
    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
        """
        Make stage network of ResNet.
        Args:
            block (Cell): Resnet block.
            layer_num (int): Layer number.
            in_channel (int): Input channel.
            out_channel (int): Output channel.
            stride (int): Stride size for the first convolutional layer.
        Returns:
            SequentialCell, the output layer.
        Examples:
            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
        """
        layers = []
        resnet_block = block(in_channel, out_channel, stride=stride)
        layers.append(resnet_block)
        for _ in range(1, layer_num):
            resnet_block = block(out_channel, out_channel, stride=1)
            layers.append(resnet_block)
        return nn.SequentialCell(layers)
    def construct(self, x):
        x = self.conv1(x)
        c1 = self.maxpool(x)
        c2 = self.layer1(c1)
        c3 = self.layer2(c2)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)
        out = self.mean(c5, (2, 3))
        out = self.flatten(out)
        out = self.end_point(out)
        out = self.output_fake(out)
        return out
    def _initialize_weights(self):
        self.init_parameters_data()
        for _, m in self.cells_and_names():
            np.random.seed(1)
            if isinstance(m, nn.Conv2dBnFoldQuant):
                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
                                                          m.weight.shape,
                                                          m.weight.dtype))
            elif isinstance(m, nn.DenseQuant):
                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
                                                          m.weight.shape,
                                                          m.weight.dtype))
            elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
                                                          m.weight.shape,
                                                          m.weight.dtype))
 def resnet50_quant(class_num=10):
    """
    Get ResNet50 neural network.
    Args:
        class_num (int): Class number.
    Returns:
        Cell, cell instance of ResNet50 neural network.
    Examples:
        >>> net = resnet50_quant(10)
    """
    return ResNet(ResidualBlock,
                  [3, 4, 6, 3],
                  [64, 256, 512, 1024],
                  [256, 512, 1024, 2048],
                  [1, 2, 2, 2],
                  class_num)
 def resnet101_quant(class_num=1001):
    """
    Get ResNet101 neural network.
    Args:
        class_num (int): Class number.
    Returns:
        Cell, cell instance of ResNet101 neural network.
    Examples:
        >>> net = resnet101(1001)
    """
    return ResNet(ResidualBlock,
                  [3, 4, 23, 3],
                  [64, 256, 512, 1024],
                  [256, 512, 1024, 2048],
                  [1, 2, 2, 2],
                  class_num)
--- a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
+++ b/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
@ -1,131 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Train Resnet50_quant on Cifar10"""
 import pytest
 import numpy as np
 from easydict import EasyDict as ed
 from mindspore import context
 from mindspore import Tensor
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.train.model import Model
 from mindspore.compression.quant import QuantizationAwareTraining
 from mindspore import set_seed
 from resnet_quant_manual import resnet50_quant
 from dataset import create_dataset
 from lr_generator import get_lr
 from utils import Monitor, CrossEntropy
 config_quant = ed({
    "class_num": 10,
    "batch_size": 128,
    "step_threshold": 20,
    "loss_scale": 1024,
    "momentum": 0.9,
    "weight_decay": 1e-4,
    "epoch_size": 1,
    "pretrained_epoch_size": 90,
    "buffer_size": 1000,
    "image_height": 224,
    "image_width": 224,
    "data_load_mode": "original",
    "save_checkpoint": True,
    "save_checkpoint_epochs": 1,
    "keep_checkpoint_max": 50,
    "save_checkpoint_path": "./",
    "warmup_epochs": 0,
    "lr_decay_mode": "cosine",
    "use_label_smooth": True,
    "label_smooth_factor": 0.1,
    "lr_init": 0,
    "lr_max": 0.005,
 })
 dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_resnet50_quant():
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    config = config_quant
    print("training configure: {}".format(config))
    epoch_size = config.epoch_size
    # define network
    net = resnet50_quant(class_num=config.class_num)
    net.set_train(True)
    # define loss
    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(
        smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
    #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    # convert fusion network to quantization aware network
    quantizer = QuantizationAwareTraining(bn_fold=True,
                                          per_channel=[True, False],
                                          symmetric=[True, False])
    net = quantizer.quantize(net)
    # get learning rate
    lr = Tensor(get_lr(lr_init=config.lr_init,
                       lr_end=0.0,
                       lr_max=config.lr_max,
                       warmup_epochs=config.warmup_epochs,
                       total_epochs=config.epoch_size,
                       steps_per_epoch=step_size,
                       lr_decay_mode='cosine'))
    # define optimization
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                   config.weight_decay, config.loss_scale)
    # define model
    #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
    model = Model(net, loss_fn=loss, optimizer=opt)
    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)
    callbacks = [monitor]
    model.train(epoch_size, dataset, callbacks=callbacks,
                dataset_sink_mode=False)
    print("============== End Training ==============")
    expect_avg_step_loss = 2.60
    avg_step_loss = np.mean(np.array(monitor.losses))
    print("average step loss:{}".format(avg_step_loss))
    assert avg_step_loss < expect_avg_step_loss
 if __name__ == '__main__':
    test_resnet50_quant()
--- a/tests/st/quantization/resnet50_quant/utils.py
+++ b/tests/st/quantization/resnet50_quant/utils.py
@ -1,105 +0,0 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Resnet50 utils"""
 import time
 import numpy as np
 from mindspore.train.callback import Callback
 from mindspore import Tensor
 from mindspore import nn
 from mindspore.nn.loss.loss import LossBase
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common import dtype as mstype
 class Monitor(Callback):
    """
    Monitor loss and time.
    Args:
        lr_init (numpy array): train lr
    Returns:
        None
    Examples:
        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
    """
    def __init__(self, lr_init=None, step_threshold=10):
        super(Monitor, self).__init__()
        self.lr_init = lr_init
        self.lr_init_len = len(lr_init)
        self.step_threshold = step_threshold
    def epoch_begin(self, run_context):
        self.losses = []
        self.epoch_time = time.time()
    def epoch_end(self, run_context):
        cb_params = run_context.original_args()
        epoch_mseconds = (time.time() - self.epoch_time) * 1000
        per_step_mseconds = epoch_mseconds / cb_params.batch_num
        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
                                                                                      per_step_mseconds,
                                                                                      np.mean(self.losses)))
        self.epoch_mseconds = epoch_mseconds
    def step_begin(self, run_context):
        self.step_time = time.time()
    def step_end(self, run_context):
        cb_params = run_context.original_args()
        step_mseconds = (time.time() - self.step_time) * 1000
        step_loss = cb_params.net_outputs
        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
            step_loss = step_loss[0]
        if isinstance(step_loss, Tensor):
            step_loss = np.mean(step_loss.asnumpy())
        self.losses.append(step_loss)
        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
            1, cb_params.batch_num, step_loss,
            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
        if cb_params.cur_step_num == self.step_threshold:
            run_context.request_stop()
 class CrossEntropy(LossBase):
    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
    def __init__(self, smooth_factor=0, num_classes=1001):
        super(CrossEntropy, self).__init__()
        self.onehot = P.OneHot()
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor /
                                (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits()
        self.mean = P.ReduceMean(False)
    def construct(self, logit, label):
        one_hot_label = self.onehot(label, F.shape(
            logit)[1], self.on_value, self.off_value)
        loss = self.ce(logit, one_hot_label)
        loss = self.mean(loss, 0)
        return loss