remove compression and testcases

2022-12-21 10:35:59 +08:00 · 2022-12-21 10:35:59 +08:00 · ea0b841653
parent 50fb77d84f
commit ea0b841653
43 changed files with 7 additions and 6186 deletions
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -289,7 +289,6 @@ install(
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/cmake/package_mac.cmake
+++ b/cmake/package_mac.cmake
@ -164,7 +164,6 @@ install(
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@ -250,7 +250,6 @@ install(
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
--- a/docs/api/api_python/mindspore/mindspore.export.rst
+++ b/docs/api/api_python/mindspore/mindspore.export.rst
@ -23,9 +23,6 @@ mindspore.export

        - **kwargs** (dict) - 配置选项字典。

-          - **quant_mode** (str) - 如果网络是量化感知训练网络，那么 `quant_mode` 需要设置为"QUANT"，否则 `quant_mode` 需要设置为"NONQUANT"。
-          - **mean** (float) - 预处理后输入数据的平均值，用于量化网络的第一层。默认值：127.5。
-          - **std_dev** (float) - 预处理后输入数据的方差，用于量化网络的第一层。默认值：127.5。
          - **enc_key** (str) - 用于加密的字节类型密钥，有效长度为16、24或者32。
          - **enc_mode** (Union[str, function]) - 指定加密模式，当设置 `enc_key` 时启用。

--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) {
         "Get the number of parallel operators.")
    .def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
         "Get Allreduce Fusion Dictionary.")
-    .def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"),
-         "Fetch the inputs of Conv or Matmul for quant export.")
    .def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
         "Build data graph.")
    .def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"),
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() {
  ConfigManager::GetInstance().ResetConfig();
 }

-void GraphExecutorPy::GetWeightInfo(
-  const CNodePtr &root_node, const AnfNodePtr &weight_node,
-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const {
-  MS_EXCEPTION_IF_NULL(root_node);
-  MS_EXCEPTION_IF_NULL(fake_quant_table);
-  std::string weight_name;
-  auto x = root_node->input(1);
-  MS_EXCEPTION_IF_NULL(x);
-  if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
-    weight_name = weight_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
-  } else {
-    auto para = weight_node->cast_ptr<Parameter>();
-    MS_EXCEPTION_IF_NULL(para);
-    weight_name = para->name();
-  }
-  // find the fakequant from input
-  int64_t count = 0;
-  const int64_t max_depth = 5;
-  auto is_quant_cnode = [](const AnfNodePtr &node) {
-    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
-  };
-  while (!is_quant_cnode(x)) {
-    if (count >= max_depth) {
-      break;
-    }
-    auto cnode = x->cast_ptr<CNode>();
-    if (cnode == nullptr || cnode->size() <= 1) {
-      break;
-    }
-    x = cnode->input(1);
-    count += 1;
-  }
-  if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
-    (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
-  }
-  // get the fakequant parameter minq's name
-  if (!is_quant_cnode(x)) {
-    return;
-  }
-  auto cnode = x->cast_ptr<CNode>();
-  constexpr size_t expect_input_size = 4;
-  if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) {
-    return;
-  }
-  const size_t fakequant_index = 2;
-  auto fakequant_min_node = cnode->input(fakequant_index);
-  if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
-    return;
-  }
-  std::string fakequant_min_node_name;
-  if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
-    fakequant_min_node_name = fakequant_min_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
-  } else {
-    auto param = fakequant_min_node->cast_ptr<Parameter>();
-    MS_EXCEPTION_IF_NULL(param);
-    fakequant_min_node_name = param->name();
-  }
-  auto quant_op = GetValuePtr<PrimitivePy>(cnode->input(0));
-  if (quant_op == nullptr) {
-    return;
-  }
-  (*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name);
-}
-
-std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> GraphExecutorPy::FetchInfoForQuantExport(
-  const std::string &phase) {
-  FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!";
-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> fake_quant_table;
-  auto filter = [](const AnfNodePtr &node) {
-    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
-             IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
-  };
-  std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
-  auto is_quant_cnode = [](const AnfNodePtr &node) {
-    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
-  };
-  const size_t root_node_size = 3;
-  const size_t weight_index = 2;
-  for (const auto &node : nodes) {
-    auto root_node = node->cast<CNodePtr>();
-    if (root_node == nullptr || root_node->size() != root_node_size) {
-      continue;
-    }
-    auto weight = root_node->input(weight_index);
-    if (!is_quant_cnode(weight)) {
-      auto tuple_node = weight->cast_ptr<CNode>();
-      if (tuple_node != nullptr) {
-        auto fake_node = tuple_node->input(1);
-        if (!is_quant_cnode(fake_node)) {
-          continue;
-        } else {
-          weight = fake_node;
-        }
-      }
-    }
-    // get parameter weight's name
-    auto cnode = weight->cast_ptr<CNode>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto weight_node = cnode->input(weight_index);
-    MS_EXCEPTION_IF_NULL(weight_node);
-    if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
-      continue;
-    }
-    GetWeightInfo(root_node, weight_node, &fake_quant_table);
-  }
-  return fake_quant_table;
-}
-
 void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) {
  // save the graph to GraphExecutorPy
  FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
  void TerminateDebugger();
 #endif

-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
-    const std::string &phase);
-
  // Generate a key for mapping function graph
  py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false);

@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {

 private:
  GraphExecutorPy() = default;
-  void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
-                     std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const;
  void ParallelPostProcess(const string &phase);
  void GetGeBackendPolicy() const;
  // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
@ -161,7 +161,6 @@
 #include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h"
 #include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h"
-#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h"
@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr<session::KernelGraph> &kernel_graph
  unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>());
  unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>());
  unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>());
-  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerLayerGradUnifyMindIR>());
-  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerChannelGradUnifyMindIR>());
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
  if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
@ -1,233 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
-
-#include <vector>
-#include <memory>
-
-#include "include/common/utils/utils.h"
-#include "utils/ms_context.h"
-#include "backend/common/optimizer/helper.h"
-#include "runtime/device/kernel_info.h"
-#include "backend/common/session/anf_runtime_algorithm.h"
-#include "include/common/utils/anfalgo.h"
-#include "utils/trace_base.h"
-
-namespace mindspore {
-namespace opt {
-void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-  std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
-  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
-  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perlayer_grad_d_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDOpName)),
-    lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3],
-    lsq_perlayer_grad_inputs[kIndex4]};
-  auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d);
-  lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL),
-                common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL),
-                 common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get());
-
-  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d);
-  CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
-                                 lsq_perlayer_grad_d_outputs);
-}
-
-void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-  const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
-  std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
-  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
-  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)),
-    lsq_perlayer_grad_d_outputs[kIndex1]};
-  auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad);
-  lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get());
-
-  (*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad);
-}
-
-void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-  std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
-  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
-  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perchannel_grad_d_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDOpName)),
-    lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3],
-    lsq_perchannel_grad_inputs[kIndex4]};
-  auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d);
-  lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL),
-                common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL),
-                 common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get());
-
-  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
-  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
-  CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
-                                 lsq_perchannel_grad_d_outputs);
-}
-
-void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-  const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
-  std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
-  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
-  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)),
-    lsq_perchannel_grad_d_outputs[kIndex1]};
-  auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad);
-  lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get());
-  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad);
-  (*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad);
-}
-
-const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const {
-  VarPtr Xs = std::make_shared<SeqVar>();
-  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradOpName);
-  return VectorRef({prim, Xs});
-}
-
-const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
-                                                                       const AnfNodePtr &node, const EquivPtr &) const {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(func_graph);
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
-  MS_EXCEPTION_IF_NULL(primitive);
-
-  std::vector<AnfNodePtr> lsq_perlayer_grad_d_outputs;
-  CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs);
-  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_outputs;
-  CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs,
-                                       &lsq_perlayer_reduce_grad_outputs);
-  if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be "
-                      << kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0],
-                                               lsq_perlayer_reduce_grad_outputs[0]};
-  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
-  return make_tuple;
-}
-
-const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const {
-  VarPtr Xs = std::make_shared<SeqVar>();
-  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradOpName);
-  return VectorRef({prim, Xs});
-}
-
-const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
-                                                                         const AnfNodePtr &node,
-                                                                         const EquivPtr &) const {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(func_graph);
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
-  MS_EXCEPTION_IF_NULL(primitive);
-
-  std::vector<AnfNodePtr> lsq_perchannel_grad_d_outputs;
-  CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs);
-  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_outputs;
-  CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs,
-                                         &lsq_perchannel_reduce_grad_outputs);
-  if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be "
-                      << kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0],
-                                               lsq_perchannel_reduce_grad_outputs[0]};
-  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
-  return make_tuple;
-}
-}  // namespace opt
-}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
@ -1,72 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
-#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
-
-#include <vector>
-#include "backend/common/optimizer/optimizer.h"
-#include "backend/common/optimizer/helper.h"
-
-namespace mindspore {
-namespace opt {
-constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2;
-constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5;
-constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2;
-constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad";
-constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD";
-constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce";
-
-constexpr auto kAttrNeg_trunc = "neg_trunc";
-constexpr auto kAttrChannelAxis = "channel_axis";
-
-class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass {
- public:
-  explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true)
-      : PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {}
-  ~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default;
-  const BaseRef DefinePattern() const override;
-  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
-
- private:
-  void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-                                       std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const;
-  void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-                                            const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
-                                            std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const;
-};
-
-class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass {
- public:
-  explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true)
-      : PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {}
-  ~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default;
-  const BaseRef DefinePattern() const override;
-  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
-
- private:
-  void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-                                         std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const;
-  void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-                                              const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
-                                              std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const;
-};
-
-}  // namespace opt
-}  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
--- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy";
 constexpr const char kNameReLUV2[] = "ReLUV2";
 constexpr const char kNameAccumulateNV2[] = "AccumulateNV2";
 constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad";
-constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars";
-constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient";
-constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel";
-constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient";
 constexpr const char kNameActsULQ[] = "ActsULQ";
 constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad";
 constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad";
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits<std::vector<int
 OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}};
 REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad))

-// FakeQuantWithMinMaxVars
-INPUT_MAP(FakeQuantWithMinMaxVars) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVars) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                     {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars))
-
-// FakeQuantWithMinMaxVarsGradient
-INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
-  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                             {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
-  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient,
-              ADPT_DESC(FakeQuantWithMinMaxVarsGradient))
-
-// FakeQuantWithMinMaxVarsPerChannel
-INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                               {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel,
-              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel))
-
-// FakeQuantWithMinMaxVarsPerChannelGradient
-INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
-  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                                       {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
-  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient,
-              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient))
-
 // GreaterEqual
 INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
 ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2)
 DECLARE_OP_ADAPTER(ConfusionMulGrad)
 DECLARE_OP_USE_OUTPUT(ConfusionMulGrad)

-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient)
-
 DECLARE_OP_ADAPTER(GreaterEqual)
 DECLARE_OP_USE_OUTPUT(GreaterEqual)

--- a/mindspore/python/mindspore/common/api.py
+++ b/mindspore/python/mindspore/common/api.py
@ -1487,12 +1487,6 @@ class _CellGraphExecutor:
        """
        self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key)

-    def fetch_info_for_quant_export(self, exec_id):
-        """Get graph proto from pipeline."""
-        if self._graph_executor.has_compiled(exec_id) is False:
-            return None
-        return self._graph_executor.fetch_info_for_quant_export(exec_id)
-

 def ms_memory_recycle():
    """
--- a/mindspore/python/mindspore/compression/OWNERS
+++ b/mindspore/python/mindspore/compression/OWNERS
@ -1,4 +0,0 @@
-approvers:
- zhang_xue_tong
- jpc_chenjianping
- hangangqiang
--- a/mindspore/python/mindspore/compression/init.py
+++ b/mindspore/python/mindspore/compression/init.py
@ -1,19 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-MindSpore compression module.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
--- a/mindspore/python/mindspore/compression/common/init.py
+++ b/mindspore/python/mindspore/compression/common/init.py
@ -1,24 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Common module for various compression algorithms, now only including datatype definition for quantization.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-from mindspore.compression.common.constant import QuantDtype
-
-__all__ = ["QuantDtype"]
--- a/mindspore/python/mindspore/compression/common/constant.py
+++ b/mindspore/python/mindspore/compression/common/constant.py
@ -1,124 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Constant module for compression. This is interface that is subject to change or deletion.
-"""
-from __future__ import absolute_import
-
-import enum
-import re
-from types import DynamicClassAttribute
-
-
-__all__ = ["QuantDtype"]
-
-
-@enum.unique
-class QuantDtype(enum.Enum):
-    """
-    An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`.
-    """
-    INT2 = "INT2"
-    INT3 = "INT3"
-    INT4 = "INT4"
-    INT5 = "INT5"
-    INT6 = "INT6"
-    INT7 = "INT7"
-    INT8 = "INT8"
-
-    UINT2 = "UINT2"
-    UINT3 = "UINT3"
-    UINT4 = "UINT4"
-    UINT5 = "UINT5"
-    UINT6 = "UINT6"
-    UINT7 = "UINT7"
-    UINT8 = "UINT8"
-
-    def __str__(self):
-        return f"{self.name}"
-
-    @staticmethod
-    def is_signed(dtype):
-        """
-        Get whether the quant datatype is signed.
-
-        Args:
-            dtype (QuantDtype): quant datatype.
-
-        Returns:
-            bool, whether the input quant datatype is signed.
-
-        Examples:
-            >>> quant_dtype = QuantDtype.INT8
-            >>> is_signed = QuantDtype.is_signed(quant_dtype)
-        """
-        return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
-                         QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
-
-    @staticmethod
-    def switch_signed(dtype):
-        """
-        Switch the signed state of the input quant datatype.
-
-        Args:
-            dtype (QuantDtype): quant datatype.
-
-        Returns:
-            QuantDtype, quant datatype with opposite signed state as the input.
-
-        Examples:
-            >>> quant_dtype = QuantDtype.INT8
-            >>> quant_dtype = QuantDtype.switch_signed(quant_dtype)
-        """
-        type_map = {
-            QuantDtype.INT2: QuantDtype.UINT2,
-            QuantDtype.INT3: QuantDtype.UINT3,
-            QuantDtype.INT4: QuantDtype.UINT4,
-            QuantDtype.INT5: QuantDtype.UINT5,
-            QuantDtype.INT6: QuantDtype.UINT6,
-            QuantDtype.INT7: QuantDtype.UINT7,
-            QuantDtype.INT8: QuantDtype.UINT8,
-            QuantDtype.UINT2: QuantDtype.INT2,
-            QuantDtype.UINT3: QuantDtype.INT3,
-            QuantDtype.UINT4: QuantDtype.INT4,
-            QuantDtype.UINT5: QuantDtype.INT5,
-            QuantDtype.UINT6: QuantDtype.INT6,
-            QuantDtype.UINT7: QuantDtype.INT7,
-            QuantDtype.UINT8: QuantDtype.INT8
-        }
-        return type_map.get(dtype)
-
-    @DynamicClassAttribute
-    def _value(self):
-        """The value of the Enum member."""
-        return int(re.search(r"(\d+)", self._value_).group(1))
-
-    @DynamicClassAttribute
-    def num_bits(self):
-        """
-        Get the num bits of the QuantDtype member.
-
-        Returns:
-            int, the num bits of the QuantDtype member.
-
-        Examples:
-            >>> from mindspore.compression.common import QuantDtype
-            >>> quant_dtype = QuantDtype.INT8
-            >>> num_bits = quant_dtype.num_bits
-            >>> print(num_bits)
-            8
-        """
-        return self._value
--- a/mindspore/python/mindspore/compression/export/init.py
+++ b/mindspore/python/mindspore/compression/export/init.py
@ -1,19 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Compression export module.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
--- a/mindspore/python/mindspore/compression/export/quant_export.py
+++ b/mindspore/python/mindspore/compression/export/quant_export.py
@ -1,515 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Export for quantization. This is interface that is subject to change or deletion.
-"""
-
-from __future__ import absolute_import
-
-import copy
-
-import numpy as np
-
-from mindspore import log as logger
-from mindspore import nn, ops
-from mindspore._checkparam import Validator
-from mindspore.common import Tensor
-from mindspore.common import dtype as mstype
-from mindspore.common.api import _cell_graph_executor as _executor
-from mindspore.common.parameter import Parameter
-from mindspore.nn import Cell
-from mindspore.nn.layer import quant
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.ops.operations import _inner_ops as inner
-from mindspore.compression.quant import quant_utils
-from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
-
-__all__ = ["ExportToQuantInferNetwork"]
-
-
-class QuantBlock(Cell):
-    r"""
-    A quant block of Conv/Dense, activation layer for Ascend deploy.
-
-    Calculate Conv or Dense in Int8, with Quant and DeQuant.
-
-    Notes:
-        This block is only for deploy, and not trainable.
-
-    Args:
-        in_channels (int): The number of channels in the input space.
-        out_channels (int): The number of channels in the output space.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
-            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
-            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
-        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
-        batchnorm (bool): Specifies to used batchnorm or not. Default: None.
-        activation (string): Specifies activation type. The optional values are as following:
-            'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
-            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
-
-    Inputs:
-        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
-
-    Outputs:
-        Tensor of shape :math:`(N, out\_channels)`.
-    """
-
-    def __init__(self,
-                 core_op,
-                 weight,
-                 quant_op,
-                 dequant_op,
-                 dequant_scale,
-                 bias=None,
-                 activation=None):
-        super(QuantBlock, self).__init__()
-        self.core_op = core_op
-        self.weight = weight
-        self.quant = quant_op
-        self.dequant = dequant_op
-        self.dequant_scale = dequant_scale
-        self.bias = bias
-        self.has_bias = bias is not None
-        self.activation = activation
-        self.has_act = activation is not None
-        self.bias_add = P.BiasAdd()
-        self.sub = P.Sub()
-        self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
-
-    def construct(self, x):
-        x = self.quant(x)
-        if self.has_bias:
-            weight = self.sub(self.weight, self.weight_offset)
-            x = self.core_op(x, weight)
-            x = self.bias_add(x, self.bias)
-        else:
-            x = self.core_op(x, self.weight)
-        x = self.dequant(x, self.dequant_scale)
-        x = F.cast(x, mstype.float32)
-        if self.has_act:
-            x = self.activation(x)
-        return x
-
-    def extend_repr(self):
-        s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
-        if self.has_bias:
-            s += f', bias=shape[{self.bias.shape}]'
-        if self.has_act:
-            s += f', activation={self.activation}'
-        s += f', dequant={self.dequant}'
-        return s
-
-
-class QuantMindirBlock(Cell):
-    """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
-
-       Args:
-        core_op (Cell): The operation cell.
-        weight (Tensor): The weight of the cell.
-        bias (Tensor): The bias of the cell. Default: None.
-        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
-        param_dict (dict): The information of the cell.
-    """
-
-    def __init__(self,
-                 core_op,
-                 weight,
-                 bias=None,
-                 activation=None,
-                 param_dict=None):
-
-        super(QuantMindirBlock, self).__init__()
-        self.core_op = core_op
-        if activation is not None:
-            self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
-        self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
-        self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
-        if param_dict["output_maxq"] is not None:
-            self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
-            self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
-        self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
-        if hasattr(core_op, 'pad_mode'):
-            self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
-        self.core_op.add_prim_attr("act_num_bits", Tensor(8))
-        self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
-        self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
-        if param_dict["input_narrow_range"] is not None:
-            self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
-        if param_dict["output_narrow_range"] is not None:
-            self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
-        if param_dict["input_maxq"] == 'None':
-            self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
-            self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
-        elif param_dict["input_maxq"] is not None:
-            self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
-            self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
-
-        self.weight = weight
-        self.bias = bias
-        self.has_bias = bias is not None
-        self.activation = activation
-        self.has_act = activation is not None
-        self.bias_add = P.BiasAdd()
-
-    def construct(self, x):
-        if self.has_bias:
-            x = self.core_op(x, self.weight)
-            x = self.bias_add(x, self.bias)
-        else:
-            x = self.core_op(x, self.weight)
-        if self.has_act:
-            x = self.activation(x)
-        return x
-
-    def extend_repr(self):
-        s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
-        if self.has_bias:
-            s += f', bias=shape[{self.bias.shape}]'
-        if self.has_act:
-            s += f', activation={self.activation}'
-        return s
-
-
-class ExportToQuantInferNetwork:
-    """
-    Convert quantization aware network to infer network.
-
-    Args:
-        network (Cell): MindSpore quantization aware training network.
-        inputs (Tensor): Input tensors of the `quantization aware training network`.
-        mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
-          Default: 127.5.
-        std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
-          of network. Default: 127.5.
-        is_mindir (bool): Whether export MINDIR format. Default: False.
-
-    Returns:
-        Cell, Infer network.
-    """
-
-    def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
-        network = Validator.check_isinstance('network', network, (nn.Cell,))
-        self.data_type = mstype.int8
-        self.network = copy.deepcopy(network)
-        self.network_bk = copy.deepcopy(network)
-        self.get_inputs_table(inputs)
-        self.mean = mean
-        self.std_dev = std_dev
-        self.is_mindir = is_mindir
-        self.upcell = None
-
-    @staticmethod
-    def __get_dequant_scale(scale_a_in, scale_w):
-        """Get dequant scale"""
-        scale_deq = scale_a_in * scale_w
-
-        # fuse parameter
-        # |--------|47:40|--------|39:32|--------|31:0|
-        #         offset_w [8]    shift_N [8]    deq_scale [32]
-        float32_deq_scale = scale_deq.astype(np.float32)
-        uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
-        scale_length = scale_deq.size  # channel
-        dequant_param = np.zeros(scale_length, dtype=np.uint64)
-        for index in range(scale_length):
-            dequant_param[index] += uint32_deq_scale[index]
-        scale_deq = Tensor(dequant_param, mstype.uint64)
-        return scale_deq
-
-    def get_inputs_table(self, inputs):
-        """Get the input quantization parameters of quantization cell for quant export."""
-        phase_name = 'export_quant'
-        graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
-        self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
-
-    def run(self):
-        """Start to convert."""
-        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
-                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
-        self.network.update_cell_prefix()
-        network = self.network
-        if isinstance(network, _AddFakeQuantInput):
-            network = network.network
-        network = self._convert_quant2deploy(network)
-        return network
-
-    def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
-        """convert network's quant subcell to deploy subcell"""
-        scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
-
-        # Build the `Quant` `Dequant` op.
-        # Quant only support perlayer version. Need check here.
-        if float(scale_a_in) == 0:
-            raise ValueError("If `scale_a_in` is zero, will lead to zero error.")
-        quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
-        scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
-        dequant_op = inner.Dequant()
-
-        if isinstance(activation, _AddFakeQuantAfterSubCell):
-            activation = activation.subcell
-        elif hasattr(activation, "get_origin"):
-            activation = activation.get_origin()
-
-        # get op
-        if isinstance(cell_core, quant.DenseQuant):
-            op_core = P.MatMul()
-        else:
-            op_core = cell_core.conv
-
-        # get the `weight` and `bias`
-        weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
-
-        if self.is_mindir:
-            block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
-        else:
-            block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
-        return block
-
-    def _get_input_quant_param(self, minq_name, np_type, param_dict):
-        """get input quant parameter for quant block"""
-        fake_quant_a_in_prefix = minq_name[:-5]
-        cells = self.network_bk.cells_and_names()
-        for cell in cells:
-            if cell[0].endswith(fake_quant_a_in_prefix):
-                fake_quant_a_in = cell[1]
-                break
-        scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
-            quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
-        param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
-        return scale_a_in, zp_a_in
-
-    def __get_quant_param(self, cell_core, fake_quant_a_out):
-        """get parameter for quant block"""
-        w_minq_name = cell_core.fake_quant_weight.minq.name
-        w_maxq_name = cell_core.fake_quant_weight.maxq.name
-        np_type = mstype.dtype_to_nptype(self.data_type)
-        param_dict = dict()
-        param_dict["filter_maxq"] = None
-        param_dict["filter_minq"] = None
-        param_dict["output_maxq"] = None
-        param_dict["output_minq"] = None
-        param_dict["input_maxq"] = None
-        param_dict["input_minq"] = None
-        param_dict["input_narrow_range"] = None
-        param_dict["output_narrow_range"] = None
-        param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
-        param_dict["mean"] = self.mean
-        param_dict["std_dev"] = self.std_dev
-        param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
-        param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
-
-        scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
-            quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
-        if fake_quant_a_out is not None:
-            _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
-                quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
-            param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
-
-        info = self.quant_info_table.get(w_minq_name, None)
-        if not info:
-            info = self.quant_info_table.get(w_maxq_name, None)
-        if info:
-            _, minq_name = info
-            if minq_name == 'input':
-                scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
-                    (1 / self.std_dev), round(self.mean), 'None', 'None'
-            else:
-                scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
-        else:
-            # skip quant layer
-            scale_a_in, zp_a_in = 1.0, 0.0
-        return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
-
-    def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
-        """Get weight and bias for quantizaiton"""
-        np_type = mstype.dtype_to_nptype(self.data_type)
-        weight = cell_core.weight.data.asnumpy()
-        bias = None
-        if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
-            if cell_core.has_bias:
-                bias = cell_core.bias.data.asnumpy()
-        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
-            weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
-        elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
-            weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
-        weight_b = weight
-        bias_b = bias
-        # apply the quant
-        quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
-                                                             cell_core.fake_quant_weight.num_bits,
-                                                             cell_core.fake_quant_weight.narrow_range)
-        weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
-        if bias is not None:
-            if 0 in scale_a_in:
-                raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.")
-            if 0 in scale_w:
-                raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.")
-            bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
-
-        if isinstance(cell_core, quant.DenseQuant):
-            weight = np.transpose(weight)
-            weight_b = np.transpose(weight_b)
-
-        weight_tensor = Tensor(weight, self.data_type)
-        weight_b_tensor = Tensor(weight_b)
-        if bias_b is not None:
-            bias_b_tensor = Tensor(bias_b, mstype.float32)
-            return weight_tensor, bias, weight_b_tensor, bias_b_tensor
-        return weight_tensor, bias, weight_b_tensor, None
-
-    def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
-        """add output quant info for quant op for export mindir."""
-        if self.is_mindir:
-            if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
-                np_type = mstype.dtype_to_nptype(self.data_type)
-                _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
-                origin_op.add_prim_attr('output_maxq', Tensor(maxq))
-                origin_op.add_prim_attr('output_minq', Tensor(minq))
-
-    def _convert_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell."""
-        if subcell is not None and hasattr(subcell, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(subcell, None, None)
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            self.upcell = new_subcell
-            network.insert_child_to_cell(name, new_subcell)
-            change = True
-        return network, change
-
-    def _convert_conv(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for conv."""
-        cell_core = subcell.conv
-        activation = subcell.activation
-        fake_quant_act = None
-        if hasattr(activation, 'fake_quant_act_before'):
-            fake_quant_act = activation.fake_quant_act_before
-        elif hasattr(activation, 'fake_quant_act'):
-            fake_quant_act = activation.fake_quant_act
-        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
-            self.upcell = None
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            network.insert_child_to_cell(name, new_subcell)
-            change = True
-        return network, change
-
-    def _convert_dense(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for dense."""
-        cell_core = subcell.dense
-        activation = subcell.activation
-        fake_quant_act = None
-        if hasattr(activation, 'fake_quant_act_before'):
-            fake_quant_act = activation.fake_quant_act_before
-        elif hasattr(activation, 'fake_quant_act'):
-            fake_quant_act = activation.fake_quant_act
-        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            network.insert_child_to_cell(name, new_subcell)
-            self.upcell = None
-            change = True
-        return network, change
-
-    def _convert_act(self, subcell):
-        """Convert subcell to ant subcell for activation."""
-        activation = subcell.get_origin()
-        if isinstance(activation, nn.ReLU):
-            self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
-        elif isinstance(activation, nn.ReLU6):
-            self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
-        if self.upcell:
-            self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
-        return activation
-
-    def _convert_add(self, subcell):
-        """Convert subcell to ant subcell for add."""
-        if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
-            add_op = subcell.add.subcell
-            subcell.__delattr__("add")
-            subcell.__setattr__("add", add_op)
-        add_op = subcell.add
-        self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
-        subcell.__delattr__("fake_quant_act")
-        subcell.__setattr__("fake_quant_act", P.identity())
-
-    def _convert_observer(self, network, name, subcell):
-        """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
-        if self.upcell:
-            self._add_output_min_max_for_op(self.upcell.core_op, subcell)
-        network.__delattr__(name)
-        network.__setattr__(name, P.identity())
-
-    def _convert_fake_quant_after_cell(self, network, name, subcell):
-        """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
-        op = subcell.subcell
-        self._add_output_min_max_for_op(op, subcell.fake_quant_act)
-        network.__delattr__(name)
-        network.__setattr__(name, op)
-
-    def _convert_core_quant_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for conv and dense."""
-        is_core_subcell = True
-        if isinstance(subcell, nn.Conv2dBnAct):
-            network, change = self._convert_conv(network, change, name, subcell)
-        elif isinstance(subcell, nn.DenseBnAct):
-            network, change = self._convert_dense(network, change, name, subcell)
-        elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
-                                  quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
-            network, change = self._convert_subcell(network, change, name, subcell)
-        else:
-            is_core_subcell = False
-        return is_core_subcell, network, change
-
-    def _convert_other_quant_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for cell except conv and dense."""
-        is_other_subcell = True
-        if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
-            activation = self._convert_act(subcell)
-            network.insert_child_to_cell(name, activation)
-            change = True
-        elif isinstance(subcell, nn.TensorAddQuant):
-            self._convert_add(subcell)
-        elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
-            self._convert_observer(network, name, subcell)
-        elif isinstance(subcell, _AddFakeQuantAfterSubCell):
-            self._convert_fake_quant_after_cell(network, name, subcell)
-            change = True
-        else:
-            is_other_subcell = False
-        return is_other_subcell, network, change
-
-    def _convert_quant2deploy(self, network):
-        """Convert network's all quant subcell to deploy subcell."""
-        cells = network.name_cells()
-        change = False
-        for name in cells:
-            subcell = cells[name]
-            if subcell == network:
-                continue
-            is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
-            is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
-            if not is_core_quant_subcell and not is_other_quant_subcell:
-                self.upcell = None
-                self._convert_quant2deploy(subcell)
-        if isinstance(network, nn.SequentialCell) and change:
-            network.cell_list = list(network.cells())
-        return network
--- a/mindspore/python/mindspore/compression/quant/init.py
+++ b/mindspore/python/mindspore/compression/quant/init.py
@ -1,28 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization module, including base class of the quantizer, the quantization aware training algorithm,
-and quantization utils.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-from .quantizer import OptimizeOption
-from .qat import QuantizationAwareTraining, create_quant_config
-from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers
-
-__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining",
-           "create_quant_config", "OptimizeOption"]
--- a/mindspore/python/mindspore/compression/quant/qat.py
+++ b/mindspore/python/mindspore/compression/quant/qat.py
@ -1,634 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization aware training
-
-User can use quantization aware to train a model. MindSpore supports quantization aware training,
-which models quantization errors in both the forward and backward passes using fake-quantization
-operations. Note that the entire computation is carried out in floating point. At the end of quantization
-aware training, MindSpore provides conversion functions to convert the trained model into lower precision.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-import re
-import numpy as np
-import mindspore.context as context
-from mindspore import log as logger
-from mindspore import nn, ops
-from mindspore._checkparam import Validator, Rel
-from mindspore.nn.layer import quant
-from mindspore.ops import functional as F
-from ..common import QuantDtype
-from .quantizer import Quantizer, OptimizeOption
-from .quant_utils import compute_kl_threshold
-
-__all__ = ["QuantizationAwareTraining", "create_quant_config"]
-
-
-def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver),
-                        quant_delay=(0, 0),
-                        quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
-                        per_channel=(False, False),
-                        symmetric=(False, False),
-                        narrow_range=(False, False),
-                        mode="DEFAULT"):
-    r"""
-    Config the observer type of weights and data flow with quant parameters.
-
-    Args:
-        quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
-            applies to weights and the second applies to data flow. Currently, only
-            :class:`FakeQuantWithMinMaxObserver` supported.
-            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
-        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
-            Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow.
-            Default: (QuantDtype.INT8, QuantDtype.INT8).
-        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights
-            and the second element represents data flow, and the second element must be `False` now.
-            Default: (False, False).
-        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
-            element represents data flow. Default: (False, False).
-        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
-            The first element represents weights and the second element represents data flow.
-            Default: (False, False).
-        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
-            Default: "DEFAULT".
-
-    Returns:
-        QuantConfig, contains the observer type of weight and activation.
-
-    Raises:
-        ValueError: If the second element of `per_channel` is not `False`.
-    """
-    if per_channel[-1]:
-        raise ValueError("Arg 'per_channel' second element must be 'False'.")
-    weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
-                                                     per_channel=per_channel[0], symmetric=symmetric[0],
-                                                     narrow_range=narrow_range[0], mode=mode)
-    act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
-                                                   per_channel=per_channel[-1], symmetric=symmetric[-1],
-                                                   narrow_range=narrow_range[-1], mode=mode)
-    return quant.QuantConfig(weight=weight_observer, activation=act_observer)
-
-
-class _AddFakeQuantInput(nn.Cell):
-    """
-    Add FakeQuant OP at input of the network. Only support one input case.
-    """
-
-    def __init__(self, network, quant_delay=0):
-        super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
-        self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
-                                                                  quant_delay=quant_delay, ema=True)
-        self.fake_quant_input.update_parameters_name('fake_quant_input.')
-        self.network = network
-
-    def construct(self, data):
-        data = self.fake_quant_input(data)
-        output = self.network(data)
-        return output
-
-
-class _AddFakeQuantAfterSubCell(nn.Cell):
-    """
-    Add FakeQuant OP after of the sub Cell.
-    """
-
-    def __init__(self, subcell, **kwargs):
-        super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
-        self.subcell = subcell
-        self.mode = "DEFAULT"
-        self.max_init = 6
-        self.min_init = -6
-
-        if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]:
-            self.mode = "LEARNED_SCALE"
-            self.max_init = 16
-            self.min_init = -16
-
-        self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init,
-                                                                max_init=self.max_init,
-                                                                ema=True,
-                                                                quant_dtype=kwargs.get("quant_dtype"),
-                                                                quant_delay=kwargs.get("quant_delay"),
-                                                                per_channel=kwargs.get("per_channel"),
-                                                                symmetric=kwargs.get("symmetric"),
-                                                                narrow_range=kwargs.get("narrow_range"),
-                                                                mode=self.mode)
-
-    def construct(self, *data):
-        output = self.subcell(*data)
-        output = self.fake_quant_act(output)
-        return output
-
-
-class QuantizationAwareTraining(Quantizer):
-    r"""
-    Quantizer for quantization aware training.
-
-    Args:
-        bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
-        freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
-            Default: 1e7.
-        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
-            Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow. It is necessary to consider the
-            precision support of hardware devices in the practical quantization infer scenario.
-            Default: (QuantDtype.INT8, QuantDtype.INT8).
-        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights and the
-            second element represents data flow, and the second element must be `False` now. Default: (False, False).
-        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
-            element represents data flow. Default: (False, False).
-        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
-            The first element represents weights and the second element represents data flow.
-            Default: (False, False).
-        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
-            only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
-            `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
-            includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
-            such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
-            scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
-        one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Raises:
-        TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
-        TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
-            is not bool.
-        TypeError: If the element of `quant_dtype` is not `QuantDtype`.
-        ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
-            not less than 2.
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).
-
-    Examples:
-        >>> from mindspore.compression.quant import QuantizationAwareTraining
-        >>> from mindspore import nn
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
-        >>> net_qat = quantizer.quantize(net)
-    """
-    __quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"]
-
-    def __init__(self,
-                 bn_fold=True,
-                 freeze_bn=10000000,
-                 quant_delay=(0, 0),
-                 quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
-                 per_channel=(False, False),
-                 symmetric=(False, False),
-                 narrow_range=(False, False),
-                 optimize_option=OptimizeOption.QAT,
-                 one_conv_fold=True):
-        """Init for QuantizationAwareTraining quantizer"""
-        super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option)
-
-        def convert2list(name, value):
-            if not isinstance(value, list) and not isinstance(value, tuple):
-                value = [value]
-            elif len(value) > 2:
-                raise ValueError("input `{}` len should less then 2".format(name))
-            return value
-
-        quant_delay_list = convert2list("quant delay", quant_delay)
-        quant_dtype_list = convert2list("quant dtype", quant_dtype)
-        per_channel_list = convert2list("per channel", per_channel)
-        symmetric_list = convert2list("symmetric", symmetric)
-        narrow_range_list = convert2list("narrow range", narrow_range)
-
-        self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay")
-        self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay")
-        self.bn_fold = Validator.check_bool(bn_fold, "bn fold")
-        self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn")
-        self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype)
-        self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype)
-        self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel")
-        self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel")
-        self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric")
-        self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric")
-        self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range")
-        self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range")
-        self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold")
-        self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv,
-                                    nn.DenseBnAct: self._convert_dense}
-        self.mode = "DEFAULT"
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            self.mode = "LEARNED_SCALE"
-            if not self.weight_symmetric or not self.act_symmetric:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support "
-                                 "symmetric=(True, True) for quant")
-            if not self.weight_range or not self.act_range:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) "
-                                 "for quant")
-            if self.freeze_bn != 0:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, "
-                                 "but get freeze_bn={}".format(self.freeze_bn))
-            if self.weight_qdelay != 0 or self.act_qdelay != 0:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)")
-        self.quant_config = create_quant_config(quant_delay=quant_delay_list,
-                                                quant_dtype=quant_dtype_list,
-                                                per_channel=per_channel_list,
-                                                symmetric=symmetric_list,
-                                                narrow_range=narrow_range_list,
-                                                mode=self.mode)
-        self.eps = 1e-5
-
-    @staticmethod
-    def _convert_op_name(name):
-        pattern = re.compile(r'([A-Z]{1})')
-        name_new = re.sub(pattern, r'_\1', name).lower()
-        if name_new[0] == '_':
-            name_new = name_new[1:]
-        return name_new
-
-    def quantize(self, network):
-        """
-        Quant API to convert input network to a quantization aware training network.
-
-        Note:
-            Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.
-
-        Args:
-            network (Cell): network to be quantized.
-
-        Returns:
-            Cell, a quantization aware training network.
-
-        Raises:
-            KeyError: If the `device_target` set in context is not in `support_device`.
-        """
-
-        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
-                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
-        support_device = ["Ascend", "GPU"]
-        if context.get_context('device_target') not in support_device:
-            raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
-
-        if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            network.update_cell_prefix()
-            network = self._convert_subcells2quant(network)
-            network.update_cell_type("quant")
-        return network
-
-    def _convert_subcells2quant(self, network):
-        """
-        convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell
-        """
-        cells = network.name_cells()
-        change = False
-        for name in cells:
-            subcell = cells[name]
-            if subcell == network:
-                continue
-            if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)):
-                prefix = subcell.param_prefix
-                new_subcell = self._convert_method_map[type(subcell)](subcell)
-                new_subcell.update_parameters_name(prefix + '.')
-                network.insert_child_to_cell(name, new_subcell)
-                change = True
-            else:
-                self._convert_subcells2quant(subcell)
-        if isinstance(network, nn.SequentialCell) and change:
-            network.cell_list = list(network.cells())
-
-        # add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell.
-        if isinstance(network, (nn.FakeQuantWithMinMaxObserver,
-                                nn.Conv2dBnFoldQuantOneConv,
-                                nn.Conv2dBnFoldQuant,
-                                nn.Conv2dBnWithoutFoldQuant,
-                                nn.Conv2dQuant,
-                                nn.DenseQuant,
-                                nn.ActQuant,
-                                nn.TensorAddQuant,
-                                nn.MulQuant)):
-            return network
-
-        add_list = []
-        for name in network.__dict__:
-            if name[0] == '_':
-                continue
-            attr = network.__dict__[name]
-            if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name:
-                add_list.append((name, attr))
-        for name, prim_op in add_list:
-            prefix = name
-            add_quant = _AddFakeQuantAfterSubCell(prim_op,
-                                                  quant_dtype=self.act_dtype,
-                                                  quant_delay=self.act_qdelay,
-                                                  per_channel=self.act_channel,
-                                                  symmetric=self.act_symmetric,
-                                                  narrow_range=self.act_range,
-                                                  optimize_option=self.optimize_option)
-            if network.param_prefix:
-                prefix = '.'.join([network.param_prefix, prefix])
-            add_quant.update_parameters_name(prefix + '.')
-            del network.__dict__[name]
-            network.insert_child_to_cell(name, add_quant)
-        return network
-
-    def _convert_conv(self, subcell):
-        """
-        convert Conv2d cell to quant cell
-        """
-        min_init = -6
-        max_init = 6
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            subcell_weight_para = subcell.conv.weight.data.asnumpy()
-            if subcell.has_bn:
-                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
-                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
-        self.quant_config = self.quant_config._replace(
-            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
-
-        conv_inner = subcell.conv
-        if subcell.has_bn:
-            bn_inner = subcell.batchnorm
-            if self.bn_fold:
-                if self.one_conv_fold:
-                    conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels,
-                                                                conv_inner.out_channels,
-                                                                kernel_size=conv_inner.kernel_size,
-                                                                stride=conv_inner.stride,
-                                                                pad_mode=conv_inner.pad_mode,
-                                                                padding=conv_inner.padding,
-                                                                dilation=conv_inner.dilation,
-                                                                group=conv_inner.group,
-                                                                eps=bn_inner.eps,
-                                                                momentum=1 - bn_inner.momentum,
-                                                                has_bias=conv_inner.has_bias,
-                                                                bias_init=conv_inner.bias_init,
-                                                                quant_config=self.quant_config,
-                                                                quant_dtype=self.weight_dtype,
-                                                                fake=True)
-                else:
-                    conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels,
-                                                         conv_inner.out_channels,
-                                                         kernel_size=conv_inner.kernel_size,
-                                                         stride=conv_inner.stride,
-                                                         pad_mode=conv_inner.pad_mode,
-                                                         padding=conv_inner.padding,
-                                                         dilation=conv_inner.dilation,
-                                                         group=conv_inner.group,
-                                                         eps=bn_inner.eps,
-                                                         momentum=1 - bn_inner.momentum,
-                                                         has_bias=conv_inner.has_bias,
-                                                         bias_init=conv_inner.bias_init,
-                                                         freeze_bn=self.freeze_bn,
-                                                         quant_config=self.quant_config,
-                                                         quant_dtype=self.weight_dtype,
-                                                         fake=True)
-                # change original network Batch Normalization OP parameters to quant network
-                conv_inner.gamma = subcell.batchnorm.gamma
-                conv_inner.beta = subcell.batchnorm.beta
-                conv_inner.moving_mean = subcell.batchnorm.moving_mean
-                conv_inner.moving_variance = subcell.batchnorm.moving_variance
-            else:
-                conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels,
-                                                            conv_inner.out_channels,
-                                                            kernel_size=conv_inner.kernel_size,
-                                                            stride=conv_inner.stride,
-                                                            pad_mode=conv_inner.pad_mode,
-                                                            padding=conv_inner.padding,
-                                                            dilation=conv_inner.dilation,
-                                                            group=conv_inner.group,
-                                                            eps=bn_inner.eps,
-                                                            momentum=1 - bn_inner.momentum,
-                                                            has_bias=conv_inner.has_bias,
-                                                            bias_init=conv_inner.bias_init,
-                                                            quant_config=self.quant_config)
-                # change original network Batch Normalization OP parameters to quant network
-                conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
-                conv_inner.batchnorm.beta = subcell.batchnorm.beta
-                conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
-                conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance
-            del subcell.batchnorm
-            subcell.batchnorm = None
-            subcell.has_bn = False
-        else:
-            conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels,
-                                           kernel_size=conv_inner.kernel_size, stride=conv_inner.stride,
-                                           pad_mode=conv_inner.pad_mode, padding=conv_inner.padding,
-                                           dilation=conv_inner.dilation, group=conv_inner.group,
-                                           has_bias=conv_inner.has_bias, quant_config=self.quant_config,
-                                           quant_dtype=self.weight_dtype)
-        # change original network Conv2D OP parameters to quant network
-        conv_inner.weight = subcell.conv.weight
-        if subcell.conv.has_bias:
-            conv_inner.bias = subcell.conv.bias
-        subcell.conv = conv_inner
-        if subcell.has_act and subcell.activation is not None:
-            subcell.activation = self._convert_activation(subcell.activation)
-        elif subcell.after_fake:
-            subcell.has_act = True
-            subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype,
-                                                           quant_delay=self.act_qdelay, per_channel=self.act_channel,
-                                                           symmetric=self.act_symmetric, narrow_range=self.act_range,
-                                                           optimize_option=self.optimize_option)
-        return subcell
-
-    def _convert_dense(self, subcell):
-        """
-        convert dense cell to quant cell
-        """
-        min_init = -6
-        max_init = 6
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            subcell_weight_para = subcell.dense.weight.data.asnumpy()
-            if subcell.has_bn:
-                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
-                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
-        self.quant_config = self.quant_config._replace(
-            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
-
-        dense_inner = subcell.dense
-        dense_inner = quant.DenseQuant(dense_inner.in_channels,
-                                       dense_inner.out_channels,
-                                       has_bias=dense_inner.has_bias,
-                                       quant_config=self.quant_config,
-                                       quant_dtype=self.weight_dtype)
-        # change original network Dense OP parameters to quant network
-        dense_inner.weight = subcell.dense.weight
-        if subcell.dense.has_bias:
-            dense_inner.bias = subcell.dense.bias
-        subcell.dense = dense_inner
-        if subcell.has_act and subcell.activation is not None:
-            subcell.activation = self._convert_activation(subcell.activation)
-        elif subcell.after_fake:
-            subcell.has_act = True
-            subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
-                                                           quant_dtype=self.act_dtype,
-                                                           quant_delay=self.act_qdelay,
-                                                           per_channel=self.act_channel,
-                                                           symmetric=self.act_symmetric,
-                                                           narrow_range=self.act_range,
-                                                           optimize_option=self.optimize_option)
-        return subcell
-
-    def _convert_activation(self, activation):
-        """
-        convert activation cell to quant cell
-        """
-        act_class = activation.__class__
-        act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid]
-        act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish]
-
-        if act_class in act_list:
-            return quant.ActQuant(activation=activation,
-                                  quant_config=self.quant_config,
-                                  quant_dtype=self.act_dtype)
-        if act_class in act_list_with_fake_before:
-            return quant.ActQuant(activation=activation,
-                                  ema=True,
-                                  fake_before=True,
-                                  quant_config=self.quant_config,
-                                  quant_dtype=self.act_dtype)
-        raise ValueError("Unsupported activation in auto quant: ", act_class)
-
-    def _kl_init(self, subcell_weight_para, weight_dtype):
-        """
-        Calculate the value of max_init and min_init with compute_kl_threshold.
-        """
-        if self.weight_channel:
-            max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
-                        for weight_para_each in subcell_weight_para]
-            min_init = [-x for x in max_init]
-        else:
-            max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
-            min_init = [-x for x in max_init]
-        return min_init, max_init
-
-    def _set_mixed_bits(self, network, strategy):
-        r"""
-        Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
-        optimize_option.
-
-        Args:
-            network (Cell): Input network.
-            strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
-                ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
-                convolution layer is supported.
-
-        Returns:
-            Cell, a network with mixed bit strategy configured.
-
-        Raises:
-            ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
-        """
-        if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
-            raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
-                             "optimize_option.")
-
-        quantizable_idx = []
-        pass_cell = None
-        for i, cell_and_name in enumerate(network.cells_and_names()):
-            cell = cell_and_name[1]
-            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
-                quantizable_idx.append(i)
-
-        if len(quantizable_idx) != len(strategy):
-            raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")
-
-        quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
-        type_map = {
-            QuantDtype.INT2.num_bits: QuantDtype.INT2,
-            QuantDtype.INT3.num_bits: QuantDtype.INT3,
-            QuantDtype.INT4.num_bits: QuantDtype.INT4,
-            QuantDtype.INT5.num_bits: QuantDtype.INT5,
-            QuantDtype.INT6.num_bits: QuantDtype.INT6,
-            QuantDtype.INT7.num_bits: QuantDtype.INT7,
-            QuantDtype.INT8.num_bits: QuantDtype.INT8
-        }
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        for i, cell_and_name in enumerate(network.cells_and_names()):
-            cell = cell_and_name[1]
-            if i not in quantizable_idx:
-                continue
-            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
-                cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0])
-                if cell.weight_dtype is None:
-                    raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0])
-                if isinstance(cell, nn.Conv2dBnAct):
-                    subcell_weight_para = cell.conv.weight.data.asnumpy()
-                    if hasattr(cell.conv, 'gamma'):
-                        scale_factor = (cell.conv.gamma.data.asnumpy() /
-                                        np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
-                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
-                    cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
-                                                      min_init=min_init,
-                                                      max_init=max_init)
-                elif isinstance(cell, nn.DenseBnAct):
-                    subcell_weight_para = cell.dense.weight.data.asnumpy()
-                    if hasattr(cell.dense, 'gamma'):
-                        scale_factor = (cell.dense.gamma.data.asnumpy() /
-                                        np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
-                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
-                    cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
-                                                       min_init=min_init,
-                                                       max_init=max_init)
-        return network
--- a/mindspore/python/mindspore/compression/quant/quant_utils.py
+++ b/mindspore/python/mindspore/compression/quant/quant_utils.py
@ -1,462 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization utils.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-import numpy as np
-from mindspore._checkparam import Validator
-from mindspore import log as logger
-from ... import nn
-
-__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
-
-
-def cal_quantization_params(input_min,
-                            input_max,
-                            quant_min,
-                            quant_max,
-                            data_type,
-                            symmetric=False):
-    r"""
-    Calculate quantization params for scale and zero point.
-
-    Args:
-        input_min (numpy.ndarray): The dimension of channel or 1.
-        input_max (numpy.ndarray): The dimension of channel or 1.
-        quant_min (int): The minimum quantization integer.
-        quant_max (int): The maximum quantization integer.
-        data_type (numpy type) : Can be numpy int8, numpy uint8.
-        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
-
-    Returns:
-        scale (numpy.ndarray): quantization param.
-        zero point (numpy.ndarray): quantization param.
-    """
-    if quant_min == quant_max:
-        raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.")
-
-    input_max = np.maximum(0.0, input_max)
-    input_min = np.minimum(0.0, input_min)
-
-    if input_min.shape != input_max.shape:
-        raise ValueError("input min shape should be equal to input max.")
-    if len(input_min.shape) > 1:
-        raise ValueError("input min and max shape should be one dim.")
-    if (input_min > input_max).all():
-        raise ValueError("input_min min should be less than input max.")
-    if (input_max == input_min).all():
-        return np.ones(input_min.shape), np.zeros(input_min.shape)
-
-    # calculate scale
-    if symmetric:
-        input_max = np.maximum(-input_min, input_max)
-        input_min = -input_max
-    scale = (input_max - input_min) / (quant_max - quant_min)
-
-    # calculate zero point
-    if data_type == np.int8 and symmetric:
-        zp = np.zeros(input_min.shape)
-    else:
-        if scale == 0.0:
-            raise ValueError("scale can not be 0.")
-        zp_double = quant_min - input_min / scale
-        zp = np.floor(zp_double + 0.5)
-
-    return scale, zp
-
-
-def get_quant_min_max(data_type, num_bits=8, narrow_range=False):
-    """Calculate quantization params for minimum/maximum quantization integer"""
-    if data_type == np.int8:
-        quant_min = 0 - 2 ** (num_bits - 1)
-        quant_max = 2 ** (num_bits - 1) - 1
-    elif data_type == np.uint8:
-        quant_min = 0
-        quant_max = 2 ** num_bits - 1
-    else:
-        raise ValueError("Unsupported datatype({})".format(data_type))
-    if narrow_range:
-        quant_min = quant_min + 1
-    return quant_min, quant_max
-
-
-def weight2int(data, scale, zero_point, quant_min, quant_max):
-    r"""
-    Calculate int8/uint8 weight from fp32. the formula is defined as:
-
-    .. math::
-        int8/uint8 = round(float/scale) + offset
-
-    Args:
-        data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
-        scale (numpy.ndarray): The dimension of channel or 1.
-        zero_point (numpy.ndarray): The dimension of channel or 1.
-        quant_min (int): The minimum quantization integer.
-        quant_max (int): The maximum quantization integer.
-
-    Returns:
-        weight (numpy.ndarray): The dimension of channel or 1.
-    """
-    if scale.shape != zero_point.shape:
-        raise ValueError("`scale` and `zero_point` should have the same shape.")
-    if scale.shape[0] < 0:
-        raise ValueError("`scale` and `zero_point` shape should be greater than zero.")
-    if 0 in scale:
-        raise ValueError("Zero exist in `scale` which will lead to divide zero error.")
-    if len(scale.shape) >= 1 and scale.shape[0] > 1:
-        # for perchannel
-        if scale.shape[0] == data.shape[0]:
-            # `Conv2d` or `Dense` op weight
-            shape_list = [-1] + [1] * len(data.shape[1:])
-            scale = scale.reshape(shape_list)
-            zero_point = zero_point.reshape(shape_list)
-        elif scale.shape[0] == data.shape[1]:
-            # `DepthwiseConv2d` op weight
-            shape_list = [1, -1] + [1] * len(data.shape[2:])
-            scale = scale.reshape(shape_list)
-            zero_point = zero_point.reshape(shape_list)
-        else:
-            raise ValueError("Unsupported weight shape({})".format(data.shape))
-
-    weight_int = np.round((data / scale) + zero_point)
-    weight_int[weight_int > quant_max] = quant_max
-    weight_int[weight_int < quant_min] = quant_min
-    return weight_int
-
-
-def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
-    """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
-    minq = cell.minq.data.asnumpy()
-    maxq = cell.maxq.data.asnumpy()
-    # make sure maxq > 0 and minq <= 0
-    if cell.mode == 'LEARNED_SCALE':
-        maxq = np.abs(maxq)
-        minq = -np.abs(minq)
-    quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range)
-    symmetric = cell.symmetric and not cell.neg_trunc
-    scale, zp = cal_quantization_params(
-        minq, maxq,
-        quant_min, quant_max, data_type,
-        symmetric=symmetric)
-    return scale, zp, maxq, minq
-
-
-def fold_batchnorm(weight, cell_quant):
-    r"""
-    Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
-
-    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
-
-    Args:
-        weight (numpy.ndarray): Weight of `cell_quant`.
-        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
-
-    Returns:
-        weight (numpy.ndarray): Folded weight.
-        bias (numpy.ndarray): Folded bias.
-    """
-    variance = cell_quant.moving_variance.data.asnumpy()
-    mean = cell_quant.moving_mean.data.asnumpy()
-    gamma = cell_quant.gamma.data.asnumpy()
-    beta = cell_quant.beta.data.asnumpy()
-    epsilon = cell_quant.eps
-    if epsilon == 0:
-        raise ValueError("`epsilon` is zero may lead to divide zero error")
-    sigma = np.sqrt(variance + epsilon)
-
-    if gamma.shape[0] == weight.shape[0]:
-        # `Conv2d` or `Dense` op weight
-        shape_list = [-1] + [1] * len(weight.shape[1:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    elif gamma.shape[0] == weight.shape[1]:
-        # `DepthwiseConv2d` op weight
-        shape_list = [1, -1] + [1] * len(weight.shape[2:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    else:
-        raise ValueError("Unsupported weight shape({})".format(weight.shape))
-
-    weight = weight * _gamma / _sigma
-    bias = beta - gamma * mean / sigma
-    return weight, bias
-
-
-def without_fold_batchnorm(weight, cell_quant):
-    r"""
-    Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
-
-    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
-
-    Args:
-        weight (numpy.ndarray): Weight of `cell_quant`.
-        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
-
-    Returns:
-        weight (numpy.ndarray): whihout folded weight.
-        bias (numpy.ndarray): without folded bias.
-    """
-    variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
-    mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
-    gamma = cell_quant.batchnorm.gamma.data.asnumpy()
-    beta = cell_quant.batchnorm.beta.data.asnumpy()
-    epsilon = cell_quant.batchnorm.eps
-    if epsilon == 0:
-        raise ValueError("`epsilon` is zero may lead to divide zero error")
-    sigma = np.sqrt(variance + epsilon)
-
-    if gamma.shape[0] == weight.shape[0]:
-        # `Conv2d` or `Dense` op weight
-        shape_list = [-1] + [1] * len(weight.shape[1:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    elif gamma.shape[0] == weight.shape[1]:
-        # `DepthwiseConv2d` op weight
-        shape_list = [1, -1] + [1] * len(weight.shape[2:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    else:
-        raise ValueError("Unsupported weight shape({})".format(weight.shape))
-
-    weight = weight * _gamma / _sigma
-    bias = beta - gamma * mean / sigma
-    return weight, bias
-
-
-def compute_kl_threshold(data, bitwidth):
-    r"""
-    Using KL-J Distance to calculate the clip threshold.
-
-    Args:
-        - **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
-        - **bitwidth** (QuantDtype) - The datatype of quantization.
-    Outputs:
-        Tensor with Shape 1. Threshold to calculate the data.
-    """
-    data_max = np.abs(data).max()
-    if data_max < 1e-5:
-        return 1e-5
-    hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
-    # For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
-    # largest size, turn to use the default bins config.
-    largest_bin_size = 1024
-    if hist.shape[0] > largest_bin_size:
-        hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
-    sum_ = np.sum(hist)
-    if sum_ == 0:
-        hist = 0
-    else:
-        hist = hist / sum_
-    cumsum = np.cumsum(hist)
-    bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
-    threshold = []
-    scaling_factor = []
-    kl = []
-    if bit_pow_range + 1 > len(bin_edges) - 1:
-        th_layer_out = bin_edges[-1]
-        return float(th_layer_out)
-    for i in range(bit_pow_range + 1, len(bin_edges), 1):
-        threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
-        threshold = np.concatenate((threshold, [threshold_tmp]))
-        scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
-        scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
-        # forward interpolation
-        cumsum_tmp = np.copy(cumsum)
-        cumsum_tmp[(i - 1):] = 1
-        fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
-        fwd_xp = np.linspace(0.0, 1.0, i)
-        fwd_fp = cumsum_tmp[:i]
-        forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
-        # backward interpolation
-        bwd_x = np.linspace(0.0, 1.0, i)
-        bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
-        bwd_fp = forward_interp
-        backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
-        cumsum_tmp[:i] = backward_interp
-        if 0 in cumsum_tmp:
-            raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error")
-        kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp))  # Kullback-Leibler-J
-        kl = np.concatenate((kl, [kl_tmp]))
-    th_layer_out = threshold[np.argmin(kl)]
-    threshold = float(th_layer_out)
-    if threshold < 1e-5:
-        threshold = 1e-5
-    return threshold
-
-
-def query_quant_layers(network):
-    r"""
-    Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
-    quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
-    layers, which not exist in practical execution, may appear.
-
-    Args:
-        network (Cell): input network
-
-    Examples:
-        >>> from mindspore.compression.quant import QuantizationAwareTraining
-        >>> from mindspore.compression.quant.quant_utils import query_quant_layers
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
-        >>> net_qat = quantizer.quantize(net)
-        >>> query_quant_layers(net_qat)
-        conv1.conv.fake_quant_weight                                       INT8
-        conv1.activation.fake_quant_act                                    INT8
-        conv2.conv.fake_quant_weight                                       INT8
-        conv2.activation.fake_quant_act                                    INT8
-        fc1.dense.fake_quant_weight                                        INT8
-        fc1.activation.fake_quant_act                                      INT8
-        fc2.dense.fake_quant_weight                                        INT8
-        fc2.activation.fake_quant_act                                      INT8
-        fc3.dense.fake_quant_weight                                        INT8
-        fc3.activation.fake_quant_act                                      INT8
-    """
-    network = Validator.check_isinstance("network", network, nn.Cell)
-    tplt = "{0:60}\t{1:10}"
-    for cell_and_name in network.cells_and_names():
-        cell_name = cell_and_name[0]
-        cell = cell_and_name[1]
-        if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
-            logger.info(tplt.format(cell_name, cell.quant_dtype))
-
-
-def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
-    r"""
-    Load fp32 model parameters into quantization model.
-
-    Args:
-        quant_model(Cell): Quantization model.
-        params_dict(dict): Parameter dict that stores fp32 parameters.
-        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
-            network. Default: None.
-
-    Raises:
-        TypeError: If `quant_new_params` is not None and is not list.
-        ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
-            nor in `quant_new_params`.
-
-    Examples:
-        >>> import mindspore as ms
-        >>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt"
-        >>> param_dict = ms.load_checkpoint(ckpt_file_name)
-        >>> load_nonquant_param_into_quant_net(net, param_dict)
-    """
-    if quant_new_params is not None and not isinstance(quant_new_params, list):
-        raise TypeError("quant_new_params must be list or None.")
-    iterable_dict = {
-        'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
-        'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
-        'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
-    }
-    for param in params_dict.items():
-        key_name = param[0].split(".")[-1]
-        if key_name not in iterable_dict:
-            iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value),
-                                                       params_dict.items())))
-
-    for name, param in quant_model.parameters_and_names():
-        key_name = name.split(".")[-1]
-        if key_name not in iterable_dict.keys():
-            if key_name not in quant_new_params:
-                raise ValueError(f"Can't find match parameter in ckpt, param name = {name}")
-            continue
-        value_param = next(iterable_dict[key_name], None)
-        if value_param:
-            param.set_data(value_param[1].data)
-            logger.info(f'init model param {name} with checkpoint param {value_param[0]}')
-
-    # Perform KL_init when learned scale quantization is executed.
-    for cell_and_name in quant_model.cells_and_names():
-        cell = cell_and_name[1]
-        if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
-                             nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
-            subcell_weight_para = cell.weight.data.asnumpy()
-            if hasattr(cell, 'gamma'):
-                scale_factor = (cell.gamma.data.asnumpy() /
-                                np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-
-            if cell.fake_quant_weight.per_channel:
-                max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
-                            for weight_para_each in subcell_weight_para]
-                min_init = [-x for x in max_init]
-            else:
-                max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
-                min_init = [-x for x in max_init]
-
-            cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
-                                         min_init=min_init, max_init=max_init)
--- a/mindspore/python/mindspore/compression/quant/quantizer.py
+++ b/mindspore/python/mindspore/compression/quant/quantizer.py
@ -1,68 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Base Class of Quantizer. This is interface that is subject to change or deletion.
-"""
-
-from __future__ import absolute_import
-from abc import ABC, abstractmethod
-from enum import Enum
-
-from mindspore._checkparam import Validator
-
-__all__ = ["OptimizeOption"]
-
-
-class OptimizeOption(Enum):
-    r"""
-    An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
-    """
-    # using quantization aware training
-    QAT = "QAT"
-
-    # using the learned scale quantization
-    LEARNED_SCALE = "LEARNED_SCALE"
-
-    def __str__(self):
-        return str(self.value)
-
-
-class Quantizer(ABC):
-    """
-    Base class of Quantizer. You can implement different kind of quantizer to get different quantization result.
-
-    Notes:
-        This class is an abstract class.
-
-    Args:
-        optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default:
-            OptimizeOption.QAT.
-    """
-    def __init__(self,
-                 optimize_option=OptimizeOption.QAT):
-        if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple):
-            optimize_option = [optimize_option]
-        for option in optimize_option:
-            option = Validator.check_isinstance("optimize_option", option, OptimizeOption)
-        self.optimize_option = optimize_option
-
-    @abstractmethod
-    def quantize(self, network):
-        """
-        Quant API to convert input network to a quantization aware training network
-        Args:
-            network (Cell): network to be quantized.
-        """
--- a/mindspore/python/mindspore/nn/layer/init.py
+++ b/mindspore/python/mindspore/nn/layer/init.py
@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network.
 from __future__ import absolute_import

 from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \
-    image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
+    image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
 from mindspore.nn.layer.activation import *
 from mindspore.nn.layer.normalization import *
 from mindspore.nn.layer.container import *
@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import *
 from mindspore.nn.layer.embedding import *
 from mindspore.nn.layer.pooling import *
 from mindspore.nn.layer.image import *
-from mindspore.nn.layer.quant import *
 from mindspore.nn.layer.math import *
 from mindspore.nn.layer.combined import *
 from mindspore.nn.layer.timedistributed import *
@ -53,7 +52,6 @@ __all__.extend(basic.__all__)
 __all__.extend(embedding.__all__)
 __all__.extend(pooling.__all__)
 __all__.extend(image.__all__)
-__all__.extend(quant.__all__)
 __all__.extend(math.__all__)
 __all__.extend(combined.__all__)
 __all__.extend(timedistributed.__all__)
--- a/mindspore/python/mindspore/nn/layer/quant.py
+++ b/mindspore/python/mindspore/nn/layer/quant.py
--- a/mindspore/python/mindspore/train/serialization.py
+++ b/mindspore/python/mindspore/train/serialization.py
@ -25,7 +25,6 @@ import stat
 import threading
 from threading import Thread, Lock
 from collections import defaultdict, OrderedDict
-from functools import wraps
 from io import BytesIO

 import math
@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore.common._utils import is_shape_unknown
 from mindspore.communication.management import get_rank, get_group_size
-from mindspore.compression.export import quant_export
 from mindspore.experimental import MapParameter
 from mindspore.parallel._cell_wrapper import get_allgather_cell
 from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):

        kwargs (dict): Configuration options dictionary.

-            - quant_mode (str): If the network is a quantization aware training network, the quant_mode should
-              be set to "QUANT", else the quant_mode should be set to "NONQUANT".
-            - mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network.
-              Default: 127.5.
-            - std_dev (float): The variance of input data after preprocessing,
-              used for quantizing the first layer of the network. Default: 127.5.
            - enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32.
            - enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set.

@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
        inputs = tuple(inputs_col)

    file_name = os.path.realpath(file_name)
-    net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
    if 'enc_key' in kwargs.keys():
        kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs)
    _export(net, file_name, file_format, *inputs, **kwargs)
@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset):
            model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False


-def quant_mode_manage(func):
-    """Inherit the quant_mode in old version."""
-
-    @wraps(func)
-    def wrapper(network, *inputs, file_format, **kwargs):
-        if 'quant_mode' not in kwargs:
-            return network
-        quant_mode = kwargs.get('quant_mode')
-        if not isinstance(quant_mode, str):
-            raise TypeError("For 'export', the type of 'quant_mode' should be string, "
-                            "but got {}.".format(type(quant_mode)))
-        if quant_mode in ('AUTO', 'MANUAL'):
-            kwargs['quant_mode'] = 'QUANT'
-        return func(network, *inputs, file_format=file_format, **kwargs)
-
-    return wrapper
-
-
-@quant_mode_manage
-def _quant_export(network, *inputs, file_format, **kwargs):
-    """Exports MindSpore quantization predict model to deploy with AIR and MINDIR."""
-    supported_device = ["Ascend", "GPU"]
-    supported_formats = ['AIR', 'MINDIR']
-    quant_mode_formats = ['QUANT', 'NONQUANT']
-
-    quant_mode = kwargs['quant_mode']
-    if quant_mode not in quant_mode_formats:
-        raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, "
-                       f"but got {quant_mode}.")
-    if quant_mode == 'NONQUANT':
-        return network
-    quant_net = copy.deepcopy(network)
-    quant_net._create_time = int(time.time() * 1e9)
-
-    mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean')
-    std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev')
-    mean = Validator.check_value_type("mean", mean, (int, float))
-    std_dev = Validator.check_value_type("std_dev", std_dev, (int, float))
-
-    if context.get_context('device_target') not in supported_device:
-        raise KeyError(f"For 'export', quant export only support {supported_device} device target now, "
-                       f"but got {context.get_context('device_target')}")
-
-    if file_format not in supported_formats:
-        raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, "
-                         f"but got {file_format}.")
-
-    quant_net.set_train(False)
-    if file_format == "MINDIR":
-        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True)
-    else:
-        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs)
-    deploy_net = exporter.run()
-    return deploy_net
-
-
 def parse_print(print_file_name):
    """
    Parse data file generated by mindspore.ops.Print.
--- a/tests/st/quantization/lenet_quant/config.py
+++ b/tests/st/quantization/lenet_quant/config.py
@ -1,31 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-network config setting, will be used in test_lenet_quant.py
-"""
-
-from easydict import EasyDict as edict
-
-quant_cfg = edict({
-    'num_classes': 10,
-    'lr': 0.01,
-    'momentum': 0.9,
-    'epoch_size': 10,
-    'batch_size': 64,
-    'buffer_size': 1000,
-    'image_height': 32,
-    'image_width': 32,
-    'keep_checkpoint_max': 10,
-})
--- a/tests/st/quantization/lenet_quant/dataset.py
+++ b/tests/st/quantization/lenet_quant/dataset.py
@ -1,60 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Produce the dataset
-"""
-
-import mindspore.dataset as ds
-import mindspore.dataset.vision as CV
-import mindspore.dataset.transforms as C
-from mindspore.dataset.vision import Inter
-from mindspore.common import dtype as mstype
-
-
-def create_dataset(data_path, batch_size=32, repeat_size=1,
-                   num_parallel_workers=1):
-    """
-    create dataset for train or test
-    """
-    # define dataset
-    mnist_ds = ds.MnistDataset(data_path)
-
-    resize_height, resize_width = 32, 32
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    rescale_nml = 1 / 0.3081
-    shift_nml = -1 * 0.1307 / 0.3081
-
-    # define map operations
-    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
-    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
-    rescale_op = CV.Rescale(rescale, shift)
-    hwc2chw_op = CV.HWC2CHW()
-    type_cast_op = C.TypeCast(mstype.int32)
-
-    # apply map operations on images
-    mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-
-    # apply DatasetOps
-    buffer_size = 10000
-    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)  # 10000 as in LeNet train script
-    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
-    mnist_ds = mnist_ds.repeat(repeat_size)
-
-    return mnist_ds
--- a/tests/st/quantization/lenet_quant/lenet_fusion.py
+++ b/tests/st/quantization/lenet_quant/lenet_fusion.py
@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""LeNet."""
-import mindspore.nn as nn
-
-
-class LeNet5(nn.Cell):
-    """
-    Lenet network
-
-    Args:
-        num_class (int): Num classes. Default: 10.
-
-    Returns:
-        Tensor, output tensor
-    Examples:
-        >>> LeNet(num_class=10)
-
-    """
-
-    def __init__(self, num_class=10, channel=1):
-        super(LeNet5, self).__init__()
-        self.type = "fusion"
-        self.num_class = num_class
-
-        # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        # change `nn.Dense` to `nn.DenseBnAct`
-        self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        self.fc3 = nn.DenseBnAct(84, self.num_class)
-
-        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.flatten = nn.Flatten()
-
-    def construct(self, x):
-        x = self.conv1(x)
-        x = self.max_pool2d(x)
-        x = self.conv2(x)
-        x = self.max_pool2d(x)
-        x = self.flatten(x)
-        x = self.fc1(x)
-        x = self.fc2(x)
-        x = self.fc3(x)
-        return x
--- a/tests/st/quantization/lenet_quant/test_lenet_quant.py
+++ b/tests/st/quantization/lenet_quant/test_lenet_quant.py
@ -1,199 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-train and infer lenet quantization network
-"""
-
-import os
-import pytest
-from mindspore import context
-from mindspore import Tensor
-from mindspore.common import dtype as mstype
-import mindspore.nn as nn
-from mindspore.train.metrics import Accuracy
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
-from mindspore import load_checkpoint, load_param_into_net, export
-from mindspore.train import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.compression.quant.quantizer import OptimizeOption
-from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
-from dataset import create_dataset
-from config import quant_cfg
-from lenet_fusion import LeNet5 as LeNet5Fusion
-import numpy as np
-
-data_path = "/home/workspace/mindspore_dataset/mnist"
-lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt"
-
-def train_lenet_quant(optim_option="QAT"):
-    cfg = quant_cfg
-    ckpt_path = lenet_ckpt_path
-    ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1)
-    step_size = ds_train.get_dataset_size()
-
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-
-    # load quantization aware network checkpoint
-    param_dict = load_checkpoint(ckpt_path)
-    load_nonquant_param_into_quant_net(network, param_dict)
-
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=900,
-                                              bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
-    # define network optimization
-    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
-
-    # call back and monitor
-    config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size,
-                                   keep_checkpoint_max=cfg.keep_checkpoint_max)
-    ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt)
-
-    # define model
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
-
-    print("============== Starting Training ==============")
-    model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()],
-                dataset_sink_mode=True)
-    print("============== End Training ==============")
-
-
-def eval_quant(optim_option="QAT"):
-    cfg = quant_cfg
-    ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1)
-    ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt'
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=0,
-                                              bn_fold=False,
-                                              freeze_bn=10000,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
-    # define network optimization
-    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
-
-    # call back and monitor
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
-
-    # load quantization aware network checkpoint
-    param_dict = load_checkpoint(ckpt_path)
-    not_load_param = load_param_into_net(network, param_dict)
-    if not_load_param:
-        raise ValueError("Load param into net fail!")
-
-    print("============== Starting Testing ==============")
-    acc = model.eval(ds_eval, dataset_sink_mode=True)
-    print("============== {} ==============".format(acc))
-    assert acc['Accuracy'] > 0.98
-
-
-def export_lenet(optim_option="QAT", file_format="MINDIR"):
-    cfg = quant_cfg
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=0,
-                                              bn_fold=False,
-                                              freeze_bn=10000,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # export network
-    inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32)
-    export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO')
-
-
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
-def test_lenet_quant():
-    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    train_lenet_quant()
-    eval_quant()
-    export_lenet()
-    train_lenet_quant(optim_option="LEARNED_SCALE")
-    eval_quant(optim_option="LEARNED_SCALE")
-    export_lenet(optim_option="LEARNED_SCALE")
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_lenet_quant_ascend():
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    train_lenet_quant(optim_option="LEARNED_SCALE")
-    eval_quant(optim_option="LEARNED_SCALE")
-    export_lenet(optim_option="LEARNED_SCALE", file_format="AIR")
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_lenet_quant_ascend_pynative():
-    """
-    test_lenet_quant_ascend_pynative
-    Features: test_lenet_quant_ascend_pynative
-    Description: test_lenet_quant_ascend_pynative pynative mode
-    Expectation: None
-    """
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
-    train_lenet_quant(optim_option="QAT")
--- a/tests/st/quantization/mobilenetv2_quant/dataset.py
+++ b/tests/st/quantization/mobilenetv2_quant/dataset.py
@ -1,67 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-""" create train dataset. """
-
-from functools import partial
-import mindspore.dataset as ds
-import mindspore.common.dtype as mstype
-import mindspore.dataset.vision as C
-import mindspore.dataset.transforms as C2
-
-
-def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
-    """
-    create a train dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        config(EasyDict)：the basic config for training
-        repeat_num(int): the repeat times of dataset. Default: 1.
-        batch_size(int): the batch size of dataset. Default: 32.
-
-    Returns:
-        dataset
-    """
-
-    load_func = partial(ds.Cifar10Dataset, dataset_path)
-    cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
-
-    resize_height = config.image_height
-    resize_width = config.image_width
-    rescale = 1.0 / 255.0
-    shift = 0.0
-
-    # define map operations
-    # interpolation default BILINEAR
-    resize_op = C.Resize((resize_height, resize_width))
-    rescale_op = C.Rescale(rescale, shift)
-    normalize_op = C.Normalize(
-        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
-    changeswap_op = C.HWC2CHW()
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
-
-    # apply map operations on images
-    cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
-    cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
-
-    # apply batch operations
-    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    cifar_ds = cifar_ds.repeat(repeat_num)
-
-    return cifar_ds
--- a/tests/st/quantization/mobilenetv2_quant/lr_generator.py
+++ b/tests/st/quantization/mobilenetv2_quant/lr_generator.py
@ -1,56 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""learning rate generator"""
-
-import math
-import numpy as np
-
-
-def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
-    """
-    generate learning rate array
-
-    Args:
-       global_step(int): total steps of the training
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    for i in range(total_steps):
-        if i < warmup_steps:
-            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-        else:
-            lr = lr_end + \
-                (lr_max - lr_end) * \
-                (1. + math.cos(math.pi * (i - warmup_steps) /
-                               (total_steps - warmup_steps))) / 2.
-        if lr < 0.0:
-            lr = 0.0
-        lr_each_step.append(lr)
-
-    current_step = global_step
-    lr_each_step = np.array(lr_each_step).astype(np.float32)
-    learning_rate = lr_each_step[current_step:]
-
-    return learning_rate
--- a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
+++ b/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
@ -1,263 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2 Quant model define"""
-
-import numpy as np
-
-import mindspore.nn as nn
-from mindspore.ops import operations as P
-from mindspore import Tensor
-
-__all__ = ['mobilenetV2']
-
-
-def _make_divisible(v, divisor, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    # Make sure that round down does not go down by more than 10%.
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class GlobalAvgPooling(nn.Cell):
-    """
-    Global avg pooling definition.
-
-    Args:
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> GlobalAvgPooling()
-    """
-
-    def __init__(self):
-        super(GlobalAvgPooling, self).__init__()
-        self.mean = P.ReduceMean(keep_dims=False)
-
-    def construct(self, x):
-        x = self.mean(x, (2, 3))
-        return x
-
-
-class ConvBNReLU(nn.Cell):
-    """
-    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
-
-    Args:
-        in_planes (int): Input channel.
-        out_planes (int): Output channel.
-        kernel_size (int): Input kernel size.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
-    """
-
-    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
-        super(ConvBNReLU, self).__init__()
-        padding = (kernel_size - 1) // 2
-        self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
-                                   stride=stride,
-                                   pad_mode='pad',
-                                   padding=padding,
-                                   group=groups,
-                                   has_bn=True,
-                                   activation='relu')
-
-    def construct(self, x):
-        x = self.conv(x)
-        return x
-
-
-class InvertedResidual(nn.Cell):
-    """
-    Mobilenetv2 residual block definition.
-
-    Args:
-        inp (int): Input channel.
-        oup (int): Output channel.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        expand_ratio (int): expand ration of input channel
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResidualBlock(3, 256, 1, 1)
-    """
-
-    def __init__(self, inp, oup, stride, expand_ratio):
-        super(InvertedResidual, self).__init__()
-        assert stride in [1, 2]
-
-        hidden_dim = int(round(inp * expand_ratio))
-        self.use_res_connect = stride == 1 and inp == oup
-
-        layers = []
-        if expand_ratio != 1:
-            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
-        layers.extend([
-            # dw
-            ConvBNReLU(hidden_dim, hidden_dim,
-                       stride=stride, groups=hidden_dim),
-            # pw-linear
-            nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
-                           pad_mode='pad', padding=0, group=1, has_bn=True)
-        ])
-        self.conv = nn.SequentialCell(layers)
-        self.add = P.Add()
-
-    def construct(self, x):
-        out = self.conv(x)
-        if self.use_res_connect:
-            out = self.add(out, x)
-        return out
-
-
-class mobilenetV2(nn.Cell):
-    """
-    mobilenetV2 fusion architecture.
-
-    Args:
-        class_num (Cell): number of classes.
-        width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
-        has_dropout (bool): Is dropout used. Default is false
-        inverted_residual_setting (list): Inverted residual settings. Default is None
-        round_nearest (list): Channel round to . Default is 8
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> mobilenetV2(num_classes=1000)
-    """
-
-    def __init__(self, num_classes=1000, width_mult=1.,
-                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
-        super(mobilenetV2, self).__init__()
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        # setting of inverted residual blocks
-        self.cfgs = inverted_residual_setting
-        if inverted_residual_setting is None:
-            self.cfgs = [
-                # t, c, n, s
-                [1, 16, 1, 1],
-                [6, 24, 2, 2],
-                [6, 32, 3, 2],
-                [6, 64, 4, 2],
-                [6, 96, 3, 1],
-                [6, 160, 3, 2],
-                [6, 320, 1, 1],
-            ]
-
-        # building first layer
-        input_channel = _make_divisible(
-            input_channel * width_mult, round_nearest)
-        self.out_channels = _make_divisible(
-            last_channel * max(1.0, width_mult), round_nearest)
-
-        features = [ConvBNReLU(3, input_channel, stride=2)]
-        # building inverted residual blocks
-        for t, c, n, s in self.cfgs:
-            output_channel = _make_divisible(c * width_mult, round_nearest)
-            for i in range(n):
-                stride = s if i == 0 else 1
-                features.append(
-                    block(input_channel, output_channel, stride, expand_ratio=t))
-                input_channel = output_channel
-        # building last several layers
-        features.append(ConvBNReLU(
-            input_channel, self.out_channels, kernel_size=1))
-        # make it nn.CellList
-        self.features = nn.SequentialCell(features)
-        # mobilenet head
-        head = ([GlobalAvgPooling(),
-                 nn.DenseBnAct(self.out_channels, num_classes,
-                               has_bias=True, has_bn=False)
-                 ] if not has_dropout else
-                [GlobalAvgPooling(),
-                 nn.Dropout(0.2),
-                 nn.DenseBnAct(self.out_channels, num_classes,
-                               has_bias=True, has_bn=False)
-                 ])
-        self.head = nn.SequentialCell(head)
-
-        # init weights
-        self.init_parameters_data()
-        self._initialize_weights()
-
-    def construct(self, x):
-        x = self.features(x)
-        x = self.head(x)
-        return x
-
-    def _initialize_weights(self):
-        """
-        Initialize weights.
-
-        Args:
-
-        Returns:
-            None.
-
-        Examples:
-            >>> _initialize_weights()
-        """
-        self.init_parameters_data()
-        for _, m in self.cells_and_names():
-            np.random.seed(1)
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
-                                            m.weight.data.shape).astype("float32"))
-                m.weight.set_data(w)
-                if m.bias is not None:
-                    m.bias.set_data(
-                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.Conv2dBnAct):
-                n = m.conv.kernel_size[0] * \
-                    m.conv.kernel_size[1] * m.conv.out_channels
-                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
-                                            m.conv.weight.data.shape).astype("float32"))
-                m.conv.weight.set_data(w)
-                if m.conv.bias is not None:
-                    m.conv.bias.set_data(
-                        Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.BatchNorm2d):
-                m.gamma.set_data(
-                    Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
-                m.beta.set_data(
-                    Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
-            elif isinstance(m, nn.Dense):
-                m.weight.set_data(Tensor(np.random.normal(
-                    0, 0.01, m.weight.data.shape).astype("float32")))
-                if m.bias is not None:
-                    m.bias.set_data(
-                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.DenseBnAct):
-                m.dense.weight.set_data(
-                    Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
-                if m.dense.bias is not None:
-                    m.dense.bias.set_data(
-                        Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
+++ b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
@ -1,136 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Mobilenetv2_quant on Cifar10"""
-
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.common import set_seed
-
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropyWithLabelSmooth
-from mobilenetV2 import mobilenetV2
-
-config_ascend_quant = ed({
-    "num_classes": 10,
-    "image_height": 224,
-    "image_width": 224,
-    "batch_size": 200,
-    "step_threshold": 10,
-    "data_load_mode": "mindata",
-    "epoch_size": 1,
-    "start_epoch": 200,
-    "warmup_epochs": 1,
-    "lr": 0.3,
-    "momentum": 0.9,
-    "weight_decay": 4e-5,
-    "label_smooth": 0.1,
-    "loss_scale": 1024,
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 300,
-    "save_checkpoint_path": "./checkpoint",
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-
-def train():
-    """train"""
-    config = config_ascend_quant
-    print("training configure: {}".format(config))
-
-    epoch_size = config.epoch_size
-
-    # define network
-    network = mobilenetV2(num_classes=config.num_classes)
-    # define loss
-    if config.label_smooth > 0:
-        loss = CrossEntropyWithLabelSmooth(
-            smooth_factor=config.label_smooth, num_classes=config.num_classes)
-    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # get learning rate
-    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
-                       lr_init=0,
-                       lr_end=0,
-                       lr_max=config.lr,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=epoch_size + config.start_epoch,
-                       steps_per_epoch=step_size))
-
-    # define optimization
-    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
-                      config.weight_decay)
-    # define model
-    model = Model(network, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-    callback = [monitor]
-    model.train(epoch_size, dataset, callbacks=callback,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-
-    export_time_used = 650
-    train_time = monitor.step_mseconds
-    print('train_time_used:{}'.format(train_time))
-    assert train_time < export_time_used
-    expect_avg_step_loss = 2.32
-    avg_step_loss = np.mean(np.array(monitor.losses))
-    print("average step loss:{}".format(avg_step_loss))
-    assert avg_step_loss < expect_avg_step_loss
-
-
-@pytest.mark.level0
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_single
-def test_mobilenetv2_quant():
-    """
-    test_mobilenetv2_quant
-    Features: test_mobilenetv2_quant
-    Description: test_mobilenetv2_quant graph mode
-    Expectation: None
-    """
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    train()
-
-
-if __name__ == '__main__':
-    test_mobilenetv2_quant()
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
+++ b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
@ -1,121 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Mobilenetv2_quant gpu on Cifar10"""
-
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.common import set_seed
-
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropyWithLabelSmooth
-from mobilenetV2 import mobilenetV2
-
-config_ascend_quant = ed({
-    "num_classes": 10,
-    "image_height": 224,
-    "image_width": 224,
-    "batch_size": 300,
-    "step_threshold": 10,
-    "data_load_mode": "mindata",
-    "epoch_size": 1,
-    "start_epoch": 200,
-    "warmup_epochs": 1,
-    "lr": 0.05,
-    "momentum": 0.997,
-    "weight_decay": 4e-5,
-    "label_smooth": 0.1,
-    "loss_scale": 1024,
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 300,
-    "save_checkpoint_path": "./checkpoint",
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-@pytest.mark.level2
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_single
-def test_mobilenetv2_quant():
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    config = config_ascend_quant
-    print("training configure: {}".format(config))
-
-    epoch_size = config.epoch_size
-
-    # define network
-    network = mobilenetV2(num_classes=config.num_classes)
-    # define loss
-    if config.label_smooth > 0:
-        loss = CrossEntropyWithLabelSmooth(
-            smooth_factor=config.label_smooth, num_classes=config.num_classes)
-    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[False, False])
-    network = quantizer.quantize(network)
-
-    # get learning rate
-    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
-                       lr_init=0,
-                       lr_end=0,
-                       lr_max=config.lr,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=epoch_size + config.start_epoch,
-                       steps_per_epoch=step_size))
-
-    # define optimization
-    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
-                      config.weight_decay)
-    # define model
-    model = Model(network, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-    callback = [monitor]
-    model.train(epoch_size, dataset, callbacks=callback,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-    train_time = monitor.step_mseconds
-    print('train_time_used:{}'.format(train_time))
-    avg_step_loss = np.mean(np.array(monitor.losses))
-    print("average step loss:{}".format(avg_step_loss))
-    expect_avg_step_loss = 2.32
-    assert avg_step_loss < expect_avg_step_loss
-    export_time_used = 960
-    assert train_time < export_time_used
-
-if __name__ == '__main__':
-    test_mobilenetv2_quant()
--- a/tests/st/quantization/mobilenetv2_quant/utils.py
+++ b/tests/st/quantization/mobilenetv2_quant/utils.py
@ -1,120 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2 utils"""
-
-import time
-import numpy as np
-
-from mindspore.train.callback import Callback
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.nn.loss.loss import LossBase
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.common import dtype as mstype
-
-
-class Monitor(Callback):
-    """
-    Monitor loss and time.
-
-    Args:
-        lr_init (numpy array): train lr
-
-    Returns:
-        None
-
-    Examples:
-        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
-    """
-
-    def __init__(self, lr_init=None, step_threshold=10):
-        super(Monitor, self).__init__()
-        self.lr_init = lr_init
-        self.lr_init_len = len(lr_init)
-        self.step_threshold = step_threshold
-        self.step_mseconds = 50000
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
-                                                                                      per_step_mseconds,
-                                                                                      np.mean(self.losses)))
-        self.epoch_mseconds = epoch_mseconds
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        self.step_mseconds = min(self.step_mseconds, step_mseconds)
-        step_loss = cb_params.net_outputs
-
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
-
-        self.losses.append(step_loss)
-        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
-
-        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
-            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
-            1, cb_params.batch_num, step_loss,
-            np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
-
-        if cb_params.cur_step_num == self.step_threshold:
-            run_context.request_stop()
-
-
-class CrossEntropyWithLabelSmooth(LossBase):
-    """
-    CrossEntropyWith LabelSmooth.
-
-    Args:
-        smooth_factor (float): smooth factor, default=0.
-        num_classes (int): num classes
-
-    Returns:
-        None.
-
-    Examples:
-        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
-    """
-
-    def __init__(self, smooth_factor=0., num_classes=1000):
-        super(CrossEntropyWithLabelSmooth, self).__init__()
-        self.onehot = P.OneHot()
-        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * smooth_factor /
-                                (num_classes - 1), mstype.float32)
-        self.ce = nn.SoftmaxCrossEntropyWithLogits()
-        self.mean = P.ReduceMean(False)
-        self.cast = P.Cast()
-
-    def construct(self, logit, label):
-        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
-                                    self.on_value, self.off_value)
-        out_loss = self.ce(logit, one_hot_label)
-        out_loss = self.mean(out_loss, 0)
-        return out_loss
--- a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
+++ b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
@ -22,20 +22,18 @@ from mindspore import nn
 from mindspore import context
 from mindspore import Tensor
 from mindspore.common import set_seed
-from mindspore.compression.quant import create_quant_config

 class Net(nn.Cell):
-    def __init__(self, qconfig):
+    def __init__(self):
        super(Net, self).__init__()
-        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1),
-                                         pad_mode='valid', quant_config=qconfig)
+        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid')
    def construct(self, x):
        return self.conv(x)

+
 def test_conv2d_bn_fold_quant():
    set_seed(1)
-    quant_config = create_quant_config()
-    network = Net(quant_config)
+    network = Net()
    inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32))
    label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32))
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9)
@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant():
    train_network = nn.TrainOneStepCell(net_with_loss, opt)
    train_network.set_train()
    out_loss = train_network(inputs, label)
+    print("------------------", out_loss.asnumpy())
    expect_loss = np.array([0.940427])
    error = np.array([0.1])
    diff = out_loss.asnumpy() - expect_loss
    assert np.all(abs(diff) < error)

+
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
--- a/tests/st/quantization/resnet50_quant/dataset.py
+++ b/tests/st/quantization/resnet50_quant/dataset.py
@ -1,67 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-""" create train dataset. """
-
-from functools import partial
-
-import mindspore.common.dtype as mstype
-import mindspore.dataset as ds
-import mindspore.dataset.transforms as C2
-import mindspore.dataset.vision as C
-
-
-def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
-    """
-    create a train dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        config(EasyDict)：the basic config for training
-        repeat_num(int): the repeat times of dataset. Default: 1.
-        batch_size(int): the batch size of dataset. Default: 32.
-
-    Returns:
-        dataset
-    """
-
-    load_func = partial(ds.Cifar10Dataset, dataset_path)
-    data_set = load_func(num_parallel_workers=8, shuffle=False)
-
-    resize_height = config.image_height
-    resize_width = config.image_width
-
-    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
-    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
-
-    # define map operations
-    resize_op = C.Resize((resize_height, resize_width))
-    normalize_op = C.Normalize(mean=mean, std=std)
-    changeswap_op = C.HWC2CHW()
-    c_trans = [resize_op, normalize_op, changeswap_op]
-
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    data_set = data_set.map(operations=c_trans, input_columns="image",
-                            num_parallel_workers=8)
-    data_set = data_set.map(operations=type_cast_op,
-                            input_columns="label", num_parallel_workers=8)
-
-    # apply batch operations
-    data_set = data_set.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    data_set = data_set.repeat(repeat_num)
-
-    return data_set
--- a/tests/st/quantization/resnet50_quant/lr_generator.py
+++ b/tests/st/quantization/resnet50_quant/lr_generator.py
@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""learning rate generator"""
-
-import math
-import numpy as np
-
-
-def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
-    """
-    generate learning rate array
-
-    Args:
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-       lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    if lr_decay_mode == 'steps':
-        decay_epoch_index = [0.3 * total_steps,
-                             0.6 * total_steps, 0.8 * total_steps]
-        for i in range(total_steps):
-            if i < decay_epoch_index[0]:
-                lr = lr_max
-            elif i < decay_epoch_index[1]:
-                lr = lr_max * 0.1
-            elif i < decay_epoch_index[2]:
-                lr = lr_max * 0.01
-            else:
-                lr = lr_max * 0.001
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'poly':
-        if warmup_steps != 0:
-            inc_each_step = (float(lr_max) - float(lr_init)) / \
-                float(warmup_steps)
-        else:
-            inc_each_step = 0
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = float(lr_init) + inc_each_step * float(i)
-            else:
-                base = (1.0 - (float(i) - float(warmup_steps)) /
-                        (float(total_steps) - float(warmup_steps)))
-                lr = float(lr_max) * base * base
-                if lr < 0.0:
-                    lr = 0.0
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'cosine':
-        decay_steps = total_steps - warmup_steps
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
-                lr = float(lr_init) + lr_inc * (i + 1)
-            else:
-                linear_decay = (total_steps - i) / decay_steps
-                cosine_decay = 0.5 * \
-                    (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
-                decayed = linear_decay * cosine_decay + 0.00001
-                lr = lr_max * decayed
-            lr_each_step.append(lr)
-    else:
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-            else:
-                lr = lr_max - (lr_max - lr_end) * \
-                    (i - warmup_steps) / (total_steps - warmup_steps)
-            lr_each_step.append(lr)
-
-    learning_rate = np.array(lr_each_step).astype(np.float32)
-
-    return learning_rate
--- a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
+++ b/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
@ -1,346 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""ResNet."""
-import numpy as np
-import mindspore.nn as nn
-import mindspore.common.initializer as weight_init
-from mindspore.ops import operations as P
-from mindspore import Tensor
-from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant
-from mindspore.compression.quant import create_quant_config
-
-_ema_decay = 0.999
-_symmetric = True
-_fake = True
-_per_channel = True
-_quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))
-
-
-def _weight_variable(shape, factor=0.01):
-    init_value = np.random.randn(*shape).astype(np.float32) * factor
-    return Tensor(init_value)
-
-
-def _conv3x3(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 3, 3)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _conv1x1(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 1, 1)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _conv7x7(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 7, 7)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _bn(channel):
-    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
-                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
-
-
-def _bn_last(channel):
-    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
-                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
-
-
-def _fc(in_channel, out_channel):
-    weight_shape = (out_channel, in_channel)
-    weight = _weight_variable(weight_shape)
-    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
-
-
-class ConvBNReLU(nn.Cell):
-    """
-    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
-
-    Args:
-        in_planes (int): Input channel.
-        out_planes (int): Output channel.
-        kernel_size (int): Input kernel size.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
-    """
-
-    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
-        super(ConvBNReLU, self).__init__()
-        padding = (kernel_size - 1) // 2
-        conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
-                                 group=groups, fake=_fake, quant_config=_quant_config)
-        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
-        self.features = nn.SequentialCell(layers)
-
-    def construct(self, x):
-        output = self.features(x)
-        return output
-
-
-class ResidualBlock(nn.Cell):
-    """
-    ResNet V1 residual block definition.
-
-    Args:
-        in_channel (int): Input channel.
-        out_channel (int): Output channel.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResidualBlock(3, 256, stride=2)
-    """
-    expansion = 4
-
-    def __init__(self,
-                 in_channel,
-                 out_channel,
-                 stride=1):
-        super(ResidualBlock, self).__init__()
-
-        channel = out_channel // self.expansion
-        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
-        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
-        self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
-                                                          quant_config=_quant_config,
-                                                          kernel_size=1, stride=1, pad_mode='same', padding=0),
-                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
-                                        ]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
-                                                                           quant_config=_quant_config,
-                                                                           kernel_size=1, stride=1,
-                                                                           pad_mode='same', padding=0)
-
-        self.down_sample = False
-
-        if stride != 1 or in_channel != out_channel:
-            self.down_sample = True
-        self.down_sample_layer = None
-
-        if self.down_sample:
-            self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel,
-                                                                          quant_config=_quant_config,
-                                                                          kernel_size=1, stride=stride,
-                                                                          pad_mode='same', padding=0),
-                                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
-                                                                                    symmetric=False)
-                                                        ]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel,
-                                                                                           fake=_fake,
-                                                                                           quant_config=_quant_config,
-                                                                                           kernel_size=1,
-                                                                                           stride=stride,
-                                                                                           pad_mode='same',
-                                                                                           padding=0)
-        self.add = nn.TensorAddQuant()
-        self.relu = P.ReLU()
-
-    def construct(self, x):
-        identity = x
-        out = self.conv1(x)
-        out = self.conv2(out)
-        out = self.conv3(out)
-
-        if self.down_sample:
-            identity = self.down_sample_layer(identity)
-
-        out = self.add(out, identity)
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Cell):
-    """
-    ResNet architecture.
-
-    Args:
-        block (Cell): Block for network.
-        layer_nums (list): Numbers of block in different layers.
-        in_channels (list): Input channel in each layer.
-        out_channels (list): Output channel in each layer.
-        strides (list):  Stride size in each layer.
-        num_classes (int): The number of classes that the training images are belonging to.
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResNet(ResidualBlock,
-        >>>        [3, 4, 6, 3],
-        >>>        [64, 256, 512, 1024],
-        >>>        [256, 512, 1024, 2048],
-        >>>        [1, 2, 2, 2],
-        >>>        10)
-    """
-
-    def __init__(self,
-                 block,
-                 layer_nums,
-                 in_channels,
-                 out_channels,
-                 strides,
-                 num_classes):
-        super(ResNet, self).__init__()
-
-        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
-            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
-
-        self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
-
-        self.layer1 = self._make_layer(block,
-                                       layer_nums[0],
-                                       in_channel=in_channels[0],
-                                       out_channel=out_channels[0],
-                                       stride=strides[0])
-        self.layer2 = self._make_layer(block,
-                                       layer_nums[1],
-                                       in_channel=in_channels[1],
-                                       out_channel=out_channels[1],
-                                       stride=strides[1])
-        self.layer3 = self._make_layer(block,
-                                       layer_nums[2],
-                                       in_channel=in_channels[2],
-                                       out_channel=out_channels[2],
-                                       stride=strides[2])
-        self.layer4 = self._make_layer(block,
-                                       layer_nums[3],
-                                       in_channel=in_channels[3],
-                                       out_channel=out_channels[3],
-                                       stride=strides[3])
-
-        self.mean = P.ReduceMean(keep_dims=True)
-        self.flatten = nn.Flatten()
-        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
-        self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)
-
-        # init weights
-        self._initialize_weights()
-
-    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
-        """
-        Make stage network of ResNet.
-
-        Args:
-            block (Cell): Resnet block.
-            layer_num (int): Layer number.
-            in_channel (int): Input channel.
-            out_channel (int): Output channel.
-            stride (int): Stride size for the first convolutional layer.
-
-        Returns:
-            SequentialCell, the output layer.
-
-        Examples:
-            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
-        """
-        layers = []
-
-        resnet_block = block(in_channel, out_channel, stride=stride)
-        layers.append(resnet_block)
-
-        for _ in range(1, layer_num):
-            resnet_block = block(out_channel, out_channel, stride=1)
-            layers.append(resnet_block)
-
-        return nn.SequentialCell(layers)
-
-    def construct(self, x):
-        x = self.conv1(x)
-        c1 = self.maxpool(x)
-
-        c2 = self.layer1(c1)
-        c3 = self.layer2(c2)
-        c4 = self.layer3(c3)
-        c5 = self.layer4(c4)
-
-        out = self.mean(c5, (2, 3))
-        out = self.flatten(out)
-        out = self.end_point(out)
-        out = self.output_fake(out)
-        return out
-
-    def _initialize_weights(self):
-
-        self.init_parameters_data()
-        for _, m in self.cells_and_names():
-            np.random.seed(1)
-
-            if isinstance(m, nn.Conv2dBnFoldQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-            elif isinstance(m, nn.DenseQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-            elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-
-
-def resnet50_quant(class_num=10):
-    """
-    Get ResNet50 neural network.
-
-    Args:
-        class_num (int): Class number.
-
-    Returns:
-        Cell, cell instance of ResNet50 neural network.
-
-    Examples:
-        >>> net = resnet50_quant(10)
-    """
-    return ResNet(ResidualBlock,
-                  [3, 4, 6, 3],
-                  [64, 256, 512, 1024],
-                  [256, 512, 1024, 2048],
-                  [1, 2, 2, 2],
-                  class_num)
-
-
-def resnet101_quant(class_num=1001):
-    """
-    Get ResNet101 neural network.
-
-    Args:
-        class_num (int): Class number.
-
-    Returns:
-        Cell, cell instance of ResNet101 neural network.
-
-    Examples:
-        >>> net = resnet101(1001)
-    """
-    return ResNet(ResidualBlock,
-                  [3, 4, 23, 3],
-                  [64, 256, 512, 1024],
-                  [256, 512, 1024, 2048],
-                  [1, 2, 2, 2],
-                  class_num)
--- a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
+++ b/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
@ -1,131 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Resnet50_quant on Cifar10"""
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore import set_seed
-
-from resnet_quant_manual import resnet50_quant
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropy
-
-
-config_quant = ed({
-    "class_num": 10,
-    "batch_size": 128,
-    "step_threshold": 20,
-    "loss_scale": 1024,
-    "momentum": 0.9,
-    "weight_decay": 1e-4,
-    "epoch_size": 1,
-    "pretrained_epoch_size": 90,
-    "buffer_size": 1000,
-    "image_height": 224,
-    "image_width": 224,
-    "data_load_mode": "original",
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 50,
-    "save_checkpoint_path": "./",
-    "warmup_epochs": 0,
-    "lr_decay_mode": "cosine",
-    "use_label_smooth": True,
-    "label_smooth_factor": 0.1,
-    "lr_init": 0,
-    "lr_max": 0.005,
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_resnet50_quant():
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    config = config_quant
-    print("training configure: {}".format(config))
-    epoch_size = config.epoch_size
-
-    # define network
-    net = resnet50_quant(class_num=config.class_num)
-    net.set_train(True)
-
-    # define loss
-    if not config.use_label_smooth:
-        config.label_smooth_factor = 0.0
-    loss = CrossEntropy(
-        smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-    #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
-
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[True, False])
-    net = quantizer.quantize(net)
-
-    # get learning rate
-    lr = Tensor(get_lr(lr_init=config.lr_init,
-                       lr_end=0.0,
-                       lr_max=config.lr_max,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=config.epoch_size,
-                       steps_per_epoch=step_size,
-                       lr_decay_mode='cosine'))
-
-    # define optimization
-    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
-                   config.weight_decay, config.loss_scale)
-
-    # define model
-    #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
-    model = Model(net, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-
-    callbacks = [monitor]
-    model.train(epoch_size, dataset, callbacks=callbacks,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-
-    expect_avg_step_loss = 2.60
-    avg_step_loss = np.mean(np.array(monitor.losses))
-
-    print("average step loss:{}".format(avg_step_loss))
-    assert avg_step_loss < expect_avg_step_loss
-
-
-if __name__ == '__main__':
-    test_resnet50_quant()
--- a/tests/st/quantization/resnet50_quant/utils.py
+++ b/tests/st/quantization/resnet50_quant/utils.py
@ -1,105 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Resnet50 utils"""
-
-import time
-import numpy as np
-
-from mindspore.train.callback import Callback
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.nn.loss.loss import LossBase
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.common import dtype as mstype
-
-
-class Monitor(Callback):
-    """
-    Monitor loss and time.
-
-    Args:
-        lr_init (numpy array): train lr
-
-    Returns:
-        None
-
-    Examples:
-        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
-    """
-
-    def __init__(self, lr_init=None, step_threshold=10):
-        super(Monitor, self).__init__()
-        self.lr_init = lr_init
-        self.lr_init_len = len(lr_init)
-        self.step_threshold = step_threshold
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
-                                                                                      per_step_mseconds,
-                                                                                      np.mean(self.losses)))
-        self.epoch_mseconds = epoch_mseconds
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        step_loss = cb_params.net_outputs
-
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
-
-        self.losses.append(step_loss)
-        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
-
-        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
-            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
-            1, cb_params.batch_num, step_loss,
-            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
-
-        if cb_params.cur_step_num == self.step_threshold:
-            run_context.request_stop()
-
-
-class CrossEntropy(LossBase):
-    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
-
-    def __init__(self, smooth_factor=0, num_classes=1001):
-        super(CrossEntropy, self).__init__()
-        self.onehot = P.OneHot()
-        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * smooth_factor /
-                                (num_classes - 1), mstype.float32)
-        self.ce = nn.SoftmaxCrossEntropyWithLogits()
-        self.mean = P.ReduceMean(False)
-
-    def construct(self, logit, label):
-        one_hot_label = self.onehot(label, F.shape(
-            logit)[1], self.on_value, self.off_value)
-        loss = self.ce(logit, one_hot_label)
-        loss = self.mean(loss, 0)
-        return loss