diff --git a/cmake/package.cmake b/cmake/package.cmake
index 84fc18b9246..27b140771d2 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -289,7 +289,6 @@ install(
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-        ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
         ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
diff --git a/cmake/package_mac.cmake b/cmake/package_mac.cmake
index 7c5550b45ff..2effa7579a3 100644
--- a/cmake/package_mac.cmake
+++ b/cmake/package_mac.cmake
@@ -164,7 +164,6 @@ install(
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-    ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
     ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
diff --git a/cmake/package_win.cmake b/cmake/package_win.cmake
index b71bb2372d7..7874a35f69f 100644
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@@ -250,7 +250,6 @@ install(
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/ops
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/communication
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/profiler
-  ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/compression
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/rewrite
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/run_check
   ${CMAKE_SOURCE_DIR}/mindspore/python/mindspore/experimental
diff --git a/docs/api/api_python/mindspore/mindspore.export.rst b/docs/api/api_python/mindspore/mindspore.export.rst
index 6eeeecf4d80..ac9da4a3783 100644
--- a/docs/api/api_python/mindspore/mindspore.export.rst
+++ b/docs/api/api_python/mindspore/mindspore.export.rst
@@ -23,9 +23,6 @@ mindspore.export
 
         - **kwargs** (dict) - 配置选项字典。
 
-          - **quant_mode** (str) - 如果网络是量化感知训练网络，那么 `quant_mode` 需要设置为"QUANT"，否则 `quant_mode` 需要设置为"NONQUANT"。
-          - **mean** (float) - 预处理后输入数据的平均值，用于量化网络的第一层。默认值：127.5。
-          - **std_dev** (float) - 预处理后输入数据的方差，用于量化网络的第一层。默认值：127.5。
           - **enc_key** (str) - 用于加密的字节类型密钥，有效长度为16、24或者32。
           - **enc_mode** (Union[str, function]) - 指定加密模式，当设置 `enc_key` 时启用。
 
diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc
index a5e7b6cae80..b1dcdce0572 100644
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@@ -168,8 +168,6 @@ PYBIND11_MODULE(_c_expression, m) {
          "Get the number of parallel operators.")
     .def("get_allreduce_fusion", &GraphExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
          "Get Allreduce Fusion Dictionary.")
-    .def("fetch_info_for_quant_export", &GraphExecutorPy::FetchInfoForQuantExport, py::arg("phase") = py::str("train"),
-         "Fetch the inputs of Conv or Matmul for quant export.")
     .def("build_data_graph", &GraphExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
          "Build data graph.")
     .def("export_graph", &GraphExecutorPy::ExportGraph, py::arg("file_name"), py::arg("phase"),
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
index 8eb18f12704..b6f1e5f1e1a 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -631,122 +631,6 @@ GraphExecutorPy::~GraphExecutorPy() {
   ConfigManager::GetInstance().ResetConfig();
 }
 
-void GraphExecutorPy::GetWeightInfo(
-  const CNodePtr &root_node, const AnfNodePtr &weight_node,
-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const {
-  MS_EXCEPTION_IF_NULL(root_node);
-  MS_EXCEPTION_IF_NULL(fake_quant_table);
-  std::string weight_name;
-  auto x = root_node->input(1);
-  MS_EXCEPTION_IF_NULL(x);
-  if (IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
-    weight_name = weight_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
-  } else {
-    auto para = weight_node->cast_ptr<Parameter>();
-    MS_EXCEPTION_IF_NULL(para);
-    weight_name = para->name();
-  }
-  // find the fakequant from input
-  int64_t count = 0;
-  const int64_t max_depth = 5;
-  auto is_quant_cnode = [](const AnfNodePtr &node) {
-    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
-  };
-  while (!is_quant_cnode(x)) {
-    if (count >= max_depth) {
-      break;
-    }
-    auto cnode = x->cast_ptr<CNode>();
-    if (cnode == nullptr || cnode->size() <= 1) {
-      break;
-    }
-    x = cnode->input(1);
-    count += 1;
-  }
-  if (x->isa<Parameter>() || IsPrimitiveCNode(x, prim::kPrimLoad)) {
-    (*fake_quant_table)[weight_name] = std::make_pair(nullptr, "input");
-  }
-  // get the fakequant parameter minq's name
-  if (!is_quant_cnode(x)) {
-    return;
-  }
-  auto cnode = x->cast_ptr<CNode>();
-  constexpr size_t expect_input_size = 4;
-  if (cnode == nullptr || cnode->IsApply(prim::kPrimLoad) || cnode->size() != expect_input_size) {
-    return;
-  }
-  const size_t fakequant_index = 2;
-  auto fakequant_min_node = cnode->input(fakequant_index);
-  if (!fakequant_min_node->isa<Parameter>() && !IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
-    return;
-  }
-  std::string fakequant_min_node_name;
-  if (IsPrimitiveCNode(fakequant_min_node, prim::kPrimLoad)) {
-    fakequant_min_node_name = fakequant_min_node->cast_ptr<CNode>()->input(1)->cast_ptr<Parameter>()->name();
-  } else {
-    auto param = fakequant_min_node->cast_ptr<Parameter>();
-    MS_EXCEPTION_IF_NULL(param);
-    fakequant_min_node_name = param->name();
-  }
-  auto quant_op = GetValuePtr<PrimitivePy>(cnode->input(0));
-  if (quant_op == nullptr) {
-    return;
-  }
-  (*fake_quant_table)[weight_name] = std::make_pair(quant_op->adapter(), fakequant_min_node_name);
-}
-
-std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> GraphExecutorPy::FetchInfoForQuantExport(
-  const std::string &phase) {
-  FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_LOG(DEBUG) << "FetchInfoForQuantExport func graph(" << func_graph->ToString() << ") phase(" << phase << ")!";
-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> fake_quant_table;
-  auto filter = [](const AnfNodePtr &node) {
-    return !(IsPrimitiveCNode(node, prim::kPrimConv2D) || IsPrimitiveCNode(node, prim::kPrimMatMul) ||
-             IsPrimitiveCNode(node, prim::kPrimDepthwiseConv2dNative));
-  };
-  std::vector<AnfNodePtr> nodes = DeepScopedGraphSearchWithFilter(func_graph->get_return(), AlwaysInclude, filter);
-  auto is_quant_cnode = [](const AnfNodePtr &node) {
-    return IsPrimitiveCNode(node, prim::kPrimFakeQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeQuantPerChannel) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerLayer) ||
-           IsPrimitiveCNode(node, prim::kPrimFakeLearnedScaleQuantPerChannel);
-  };
-  const size_t root_node_size = 3;
-  const size_t weight_index = 2;
-  for (const auto &node : nodes) {
-    auto root_node = node->cast<CNodePtr>();
-    if (root_node == nullptr || root_node->size() != root_node_size) {
-      continue;
-    }
-    auto weight = root_node->input(weight_index);
-    if (!is_quant_cnode(weight)) {
-      auto tuple_node = weight->cast_ptr<CNode>();
-      if (tuple_node != nullptr) {
-        auto fake_node = tuple_node->input(1);
-        if (!is_quant_cnode(fake_node)) {
-          continue;
-        } else {
-          weight = fake_node;
-        }
-      }
-    }
-    // get parameter weight's name
-    auto cnode = weight->cast_ptr<CNode>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto weight_node = cnode->input(weight_index);
-    MS_EXCEPTION_IF_NULL(weight_node);
-    if (!weight_node->isa<Parameter>() && !IsPrimitiveCNode(weight_node, prim::kPrimLoad)) {
-      continue;
-    }
-    GetWeightInfo(root_node, weight_node, &fake_quant_table);
-  }
-  return fake_quant_table;
-}
-
 void GraphExecutorPy::SaveCompiledGraph(const std::string &phase) {
   // save the graph to GraphExecutorPy
   FuncGraphPtr func_graph = info_[phase]->resource->func_graph();
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h
index b50e89d8e88..cd7ab4d3c63 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.h
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.h
@@ -130,9 +130,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
   void TerminateDebugger();
 #endif
 
-  std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> FetchInfoForQuantExport(
-    const std::string &phase);
-
   // Generate a key for mapping function graph
   py::object GenerateArgumentsKey(const py::object &obj, const py::tuple &args, bool enable_tuple_broaden = false);
 
@@ -140,8 +137,6 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
 
  private:
   GraphExecutorPy() = default;
-  void GetWeightInfo(const CNodePtr &root_node, const AnfNodePtr &weight_node,
-                     std::map<std::string, std::pair<PrimitivePyAdapterPtr, std::string>> *fake_quant_table) const;
   void ParallelPostProcess(const string &phase);
   void GetGeBackendPolicy() const;
   // filter some pipeline actions according to phase, e.g. when exporting onnx, it is no need to execute actions after
diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
index f56c794f437..3d424d9f2f9 100644
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
@@ -161,7 +161,6 @@
 #include "plugin/device/ascend/optimizer/mindir/maxpool_to_maxpool_with_argmax.h"
 #include "plugin/device/ascend/optimizer/mindir/maxpool_with_argmax_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/optimizer_unify_output.h"
-#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/slice_grad_unify_mindir.h"
 #include "plugin/device/ascend/optimizer/mindir/update_input_names_strided_slice_grad.h"
@@ -667,8 +666,6 @@ void AscendUnifyMindIR(const std::shared_ptr<session::KernelGraph> &kernel_graph
   unify_mindir_pm->AddPass(std::make_shared<opt::MomentumUnifyOutput>());
   unify_mindir_pm->AddPass(std::make_shared<opt::RMSPropUnifyOutput>());
   unify_mindir_pm->AddPass(std::make_shared<opt::CenteredRMSPropUnifyOutput>());
-  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerLayerGradUnifyMindIR>());
-  unify_mindir_pm->AddPass(std::make_shared<opt::FakeLearnedScaleQuantPerChannelGradUnifyMindIR>());
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
   if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
deleted file mode 100644
index 1f41d68aa26..00000000000
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.cc
+++ /dev/null
@@ -1,233 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h"
-
-#include <vector>
-#include <memory>
-
-#include "include/common/utils/utils.h"
-#include "utils/ms_context.h"
-#include "backend/common/optimizer/helper.h"
-#include "runtime/device/kernel_info.h"
-#include "backend/common/session/anf_runtime_algorithm.h"
-#include "include/common/utils/anfalgo.h"
-#include "utils/trace_base.h"
-
-namespace mindspore {
-namespace opt {
-void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerGradD(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-  std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
-  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
-  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perlayer_grad_d_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDOpName)),
-    lsq_perlayer_grad_inputs[kIndex1], lsq_perlayer_grad_inputs[kIndex2], lsq_perlayer_grad_inputs[kIndex3],
-    lsq_perlayer_grad_inputs[kIndex4]};
-  auto lsq_perlayer_grad_d = NewCNode(lsq_perlayer_grad_d_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_d);
-  lsq_perlayer_grad_d->set_scope(lsq_perlayer_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL),
-                common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 0UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL),
-                 common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 0UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_grad_d.get());
-
-  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perlayer_grad_node, lsq_perlayer_grad_d);
-  CreateMultipleOutputsOfAnfNode(graph, lsq_perlayer_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
-                                 lsq_perlayer_grad_d_outputs);
-}
-
-void FakeLearnedScaleQuantPerLayerGradUnifyMindIR::CreateOutputsOfLSQPerLayerReduceGrad(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-  const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
-  std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_grad_node);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad_outputs);
-  const auto &lsq_perlayer_grad_inputs = lsq_perlayer_grad_node->inputs();
-  if (lsq_perlayer_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perlayer_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perlayer_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
-                      << trace::DumpSourceLines(lsq_perlayer_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradDReduceOpName)),
-    lsq_perlayer_grad_d_outputs[kIndex1]};
-  auto lsq_perlayer_reduce_grad = NewCNode(lsq_perlayer_reduce_grad_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perlayer_reduce_grad);
-  lsq_perlayer_reduce_grad->set_scope(lsq_perlayer_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perlayer_grad_node, 1UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perlayer_grad_node, 1UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perlayer_reduce_grad.get());
-
-  (*lsq_perlayer_reduce_grad_outputs).push_back(lsq_perlayer_reduce_grad);
-}
-
-void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelGradD(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-  std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
-  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
-  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perchannel_grad_d_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDOpName)),
-    lsq_perchannel_grad_inputs[kIndex1], lsq_perchannel_grad_inputs[kIndex2], lsq_perchannel_grad_inputs[kIndex3],
-    lsq_perchannel_grad_inputs[kIndex4]};
-  auto lsq_perchannel_grad_d = NewCNode(lsq_perchannel_grad_d_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_d);
-  lsq_perchannel_grad_d->set_scope(lsq_perchannel_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL),
-                common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 0UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL),
-                 common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 0UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_grad_d.get());
-
-  common::AnfAlgo::CopyNodeAttr(kAttrNeg_trunc, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
-  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_grad_d);
-  CreateMultipleOutputsOfAnfNode(graph, lsq_perchannel_grad_d, kFakeLearnedScaleQuantGradDOutputNum,
-                                 lsq_perchannel_grad_d_outputs);
-}
-
-void FakeLearnedScaleQuantPerChannelGradUnifyMindIR::CreateOutputsOfLSQPerChannelReduceGrad(
-  const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-  const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
-  std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_grad_node);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad_outputs);
-  const auto &lsq_perchannel_grad_inputs = lsq_perchannel_grad_node->inputs();
-  if (lsq_perchannel_grad_inputs.size() < kFakeLearnedScaleQuantGradInputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_node has wrong inputs size, should be not less than "
-                      << kFakeLearnedScaleQuantGradInputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradDOutputNum) {
-    MS_LOG(EXCEPTION) << "Lsq_perchannel_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradDOutputNum << ", but got " << lsq_perchannel_grad_inputs.size()
-                      << trace::DumpSourceLines(lsq_perchannel_grad_node);
-  }
-  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_inputs = {
-    NewValueNode(std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradDReduceOpName)),
-    lsq_perchannel_grad_d_outputs[kIndex1]};
-  auto lsq_perchannel_reduce_grad = NewCNode(lsq_perchannel_reduce_grad_inputs, graph);
-  MS_EXCEPTION_IF_NULL(lsq_perchannel_reduce_grad);
-  lsq_perchannel_reduce_grad->set_scope(lsq_perchannel_grad_node->scope());
-
-  auto types = {common::AnfAlgo::GetOutputInferDataType(lsq_perchannel_grad_node, 1UL)};
-  auto shapes = {common::AnfAlgo::GetOutputDetailShape(lsq_perchannel_grad_node, 1UL)};
-  common::AnfAlgo::SetOutputTypeAndDetailShape(types, shapes, lsq_perchannel_reduce_grad.get());
-  common::AnfAlgo::CopyNodeAttr(kAttrChannelAxis, lsq_perchannel_grad_node, lsq_perchannel_reduce_grad);
-  (*lsq_perchannel_reduce_grad_outputs).push_back(lsq_perchannel_reduce_grad);
-}
-
-const BaseRef FakeLearnedScaleQuantPerLayerGradUnifyMindIR::DefinePattern() const {
-  VarPtr Xs = std::make_shared<SeqVar>();
-  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerLayerGradOpName);
-  return VectorRef({prim, Xs});
-}
-
-const AnfNodePtr FakeLearnedScaleQuantPerLayerGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
-                                                                       const AnfNodePtr &node, const EquivPtr &) const {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(func_graph);
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
-  MS_EXCEPTION_IF_NULL(primitive);
-
-  std::vector<AnfNodePtr> lsq_perlayer_grad_d_outputs;
-  CreateOutputsOfLSQPerLayerGradD(func_graph, cnode, &lsq_perlayer_grad_d_outputs);
-  if (lsq_perlayer_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perlayer_grad_d_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> lsq_perlayer_reduce_grad_outputs;
-  CreateOutputsOfLSQPerLayerReduceGrad(func_graph, cnode, lsq_perlayer_grad_d_outputs,
-                                       &lsq_perlayer_reduce_grad_outputs);
-  if (lsq_perlayer_reduce_grad_outputs.size() != kSingleOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perlayer_reduce_grad_outputs has wrong inputs size, should be "
-                      << kSingleOutputNum << ", but got " << lsq_perlayer_reduce_grad_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perlayer_grad_d_outputs[0],
-                                               lsq_perlayer_reduce_grad_outputs[0]};
-  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
-  return make_tuple;
-}
-
-const BaseRef FakeLearnedScaleQuantPerChannelGradUnifyMindIR::DefinePattern() const {
-  VarPtr Xs = std::make_shared<SeqVar>();
-  auto prim = std::make_shared<Primitive>(kFakeLearnedScaleQuantPerChannelGradOpName);
-  return VectorRef({prim, Xs});
-}
-
-const AnfNodePtr FakeLearnedScaleQuantPerChannelGradUnifyMindIR::Process(const FuncGraphPtr &func_graph,
-                                                                         const AnfNodePtr &node,
-                                                                         const EquivPtr &) const {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(func_graph);
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto primitive = common::AnfAlgo::GetCNodePrimitive(cnode);
-  MS_EXCEPTION_IF_NULL(primitive);
-
-  std::vector<AnfNodePtr> lsq_perchannel_grad_d_outputs;
-  CreateOutputsOfLSQPerChannelGradD(func_graph, cnode, &lsq_perchannel_grad_d_outputs);
-  if (lsq_perchannel_grad_d_outputs.size() != kFakeLearnedScaleQuantGradOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_grad_d_outputs has wrong inputs size, should be "
-                      << kFakeLearnedScaleQuantGradOutputNum << ", but got " << lsq_perchannel_grad_d_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> lsq_perchannel_reduce_grad_outputs;
-  CreateOutputsOfLSQPerChannelReduceGrad(func_graph, cnode, lsq_perchannel_grad_d_outputs,
-                                         &lsq_perchannel_reduce_grad_outputs);
-  if (lsq_perchannel_reduce_grad_outputs.size() != kSingleOutputNum) {
-    MS_LOG(EXCEPTION) << "Fake_learned_scale_quant_perchannel_reduce_grad_outputs has wrong inputs size, should be "
-                      << kSingleOutputNum << ", but got " << lsq_perchannel_reduce_grad_outputs.size()
-                      << trace::DumpSourceLines(node);
-  }
-
-  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), lsq_perchannel_grad_d_outputs[0],
-                                               lsq_perchannel_reduce_grad_outputs[0]};
-  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
-  return make_tuple;
-}
-}  // namespace opt
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
deleted file mode 100644
index 79d05ef7717..00000000000
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/fake_learned_scale_quant_grad_unify_mindir.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
-#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
-
-#include <vector>
-#include "backend/common/optimizer/optimizer.h"
-#include "backend/common/optimizer/helper.h"
-
-namespace mindspore {
-namespace opt {
-constexpr size_t kFakeLearnedScaleQuantGradOutputNum = 2;
-constexpr size_t kFakeLearnedScaleQuantGradInputNum = 5;
-constexpr size_t kFakeLearnedScaleQuantGradDOutputNum = 2;
-constexpr auto kFakeLearnedScaleQuantPerLayerGradOpName = "FakeLearnedScaleQuantPerLayerGrad";
-constexpr auto kFakeLearnedScaleQuantPerLayerGradDOpName = "FakeLearnedScaleQuantPerLayerGradD";
-constexpr auto kFakeLearnedScaleQuantPerLayerGradDReduceOpName = "FakeLearnedScaleQuantPerLayerGradDReduce";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradOpName = "FakeLearnedScaleQuantPerChannelGrad";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradDOpName = "FakeLearnedScaleQuantPerChannelGradD";
-constexpr auto kFakeLearnedScaleQuantPerChannelGradDReduceOpName = "FakeLearnedScaleQuantPerChannelGradDReduce";
-
-constexpr auto kAttrNeg_trunc = "neg_trunc";
-constexpr auto kAttrChannelAxis = "channel_axis";
-
-class FakeLearnedScaleQuantPerLayerGradUnifyMindIR : public PatternProcessPass {
- public:
-  explicit FakeLearnedScaleQuantPerLayerGradUnifyMindIR(bool multigraph = true)
-      : PatternProcessPass("fake_learned_scale_quant_perlayer_grad_unify_mindir", multigraph) {}
-  ~FakeLearnedScaleQuantPerLayerGradUnifyMindIR() override = default;
-  const BaseRef DefinePattern() const override;
-  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
-
- private:
-  void CreateOutputsOfLSQPerLayerGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-                                       std::vector<AnfNodePtr> *const lsq_perlayer_grad_d_outputs) const;
-  void CreateOutputsOfLSQPerLayerReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perlayer_grad_node,
-                                            const std::vector<AnfNodePtr> &lsq_perlayer_grad_d_outputs,
-                                            std::vector<AnfNodePtr> *const lsq_perlayer_reduce_grad_outputs) const;
-};
-
-class FakeLearnedScaleQuantPerChannelGradUnifyMindIR : public PatternProcessPass {
- public:
-  explicit FakeLearnedScaleQuantPerChannelGradUnifyMindIR(bool multigraph = true)
-      : PatternProcessPass("fake_learned_scale_quant_perchannel_grad_unify_mindir", multigraph) {}
-  ~FakeLearnedScaleQuantPerChannelGradUnifyMindIR() override = default;
-  const BaseRef DefinePattern() const override;
-  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
-
- private:
-  void CreateOutputsOfLSQPerChannelGradD(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-                                         std::vector<AnfNodePtr> *const lsq_perchannel_grad_d_outputs) const;
-  void CreateOutputsOfLSQPerChannelReduceGrad(const FuncGraphPtr &graph, const CNodePtr &lsq_perchannel_grad_node,
-                                              const std::vector<AnfNodePtr> &lsq_perchannel_grad_d_outputs,
-                                              std::vector<AnfNodePtr> *const lsq_perchannel_reduce_grad_outputs) const;
-};
-
-}  // namespace opt
-}  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_MINDIR_FAKE_LEARNED_SCALE_QUANT_GRAD_UNIFY_MINDIR_H_
diff --git a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
index 3b3ceb024dc..504e3fe77df 100644
--- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
@@ -220,10 +220,6 @@ constexpr const char kNameXlogy[] = "Xlogy";
 constexpr const char kNameReLUV2[] = "ReLUV2";
 constexpr const char kNameAccumulateNV2[] = "AccumulateNV2";
 constexpr const char kNameConfusionMulGrad[] = "ConfusionMulGrad";
-constexpr const char kNameFakeQuantWithMinMaxVars[] = "FakeQuantWithMinMaxVars";
-constexpr const char kNameFakeQuantWithMinMaxVarsGradient[] = "FakeQuantWithMinMaxVarsGradient";
-constexpr const char kNameFakeQuantWithMinMaxVarsPerChannel[] = "FakeQuantWithMinMaxVarsPerChannel";
-constexpr const char kNameFakeQuantWithMinMaxVarsPerChannelGradient[] = "FakeQuantWithMinMaxVarsPerChannelGradient";
 constexpr const char kNameActsULQ[] = "ActsULQ";
 constexpr const char kNameActsULQInputGrad[] = "ActsULQInputGrad";
 constexpr const char kNameActULQClampMaxGrad[] = "ActULQClampMaxGrad";
diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
index eaf99f85fb8..a3fddc286f5 100644
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.cc
@@ -56,41 +56,6 @@ ATTR_MAP(ConfusionMulGrad) = {{"axes", ATTR_DESC(axes, AnyTraits<std::vector<int
 OUTPUT_MAP(ConfusionMulGrad) = {{0, OUTPUT_DESC(output0)}, {1, OUTPUT_DESC(output1)}};
 REG_ADPT_DESC(ConfusionMulGrad, kNameConfusionMulGrad, ADPT_DESC(ConfusionMulGrad))
 
-// FakeQuantWithMinMaxVars
-INPUT_MAP(FakeQuantWithMinMaxVars) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVars) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                     {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVars) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVars, kNameFakeQuantWithMinMaxVars, ADPT_DESC(FakeQuantWithMinMaxVars))
-
-// FakeQuantWithMinMaxVarsGradient
-INPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
-  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                             {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsGradient) = {
-  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsGradient, kNameFakeQuantWithMinMaxVarsGradient,
-              ADPT_DESC(FakeQuantWithMinMaxVarsGradient))
-
-// FakeQuantWithMinMaxVarsPerChannel
-INPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(min)}, {3, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                               {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannel) = {{0, OUTPUT_DESC(y)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel, kNameFakeQuantWithMinMaxVarsPerChannel,
-              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannel))
-
-// FakeQuantWithMinMaxVarsPerChannelGradient
-INPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
-  {1, INPUT_DESC(gradients)}, {2, INPUT_DESC(x)}, {3, INPUT_DESC(min)}, {4, INPUT_DESC(max)}};
-ATTR_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {{"num_bits", ATTR_DESC(num_bits, AnyTraits<int64_t>())},
-                                                       {"narrow_range", ATTR_DESC(narrow_range, AnyTraits<bool>())}};
-OUTPUT_MAP(FakeQuantWithMinMaxVarsPerChannelGradient) = {
-  {0, OUTPUT_DESC(backprops_wrt_x)}, {1, OUTPUT_DESC(backprops_wrt_min)}, {2, OUTPUT_DESC(backprops_wrt_max)}};
-REG_ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient, kNameFakeQuantWithMinMaxVarsPerChannelGradient,
-              ADPT_DESC(FakeQuantWithMinMaxVarsPerChannelGradient))
-
 // GreaterEqual
 INPUT_MAP(GreaterEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
 ATTR_MAP(GreaterEqual) = EMPTY_ATTR_MAP;
diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
index c2bf528867b..a55452c2722 100644
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/elewise_calculation_ops_declare.h
@@ -32,18 +32,6 @@ DECLARE_OP_USE_OUTPUT(AccumulateNV2)
 DECLARE_OP_ADAPTER(ConfusionMulGrad)
 DECLARE_OP_USE_OUTPUT(ConfusionMulGrad)
 
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVars)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVars)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsGradient)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsGradient)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannel)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannel)
-
-DECLARE_OP_ADAPTER(FakeQuantWithMinMaxVarsPerChannelGradient)
-DECLARE_OP_USE_OUTPUT(FakeQuantWithMinMaxVarsPerChannelGradient)
-
 DECLARE_OP_ADAPTER(GreaterEqual)
 DECLARE_OP_USE_OUTPUT(GreaterEqual)
 
diff --git a/mindspore/python/mindspore/common/api.py b/mindspore/python/mindspore/common/api.py
index d1e58a1a2b7..260f4010173 100644
--- a/mindspore/python/mindspore/common/api.py
+++ b/mindspore/python/mindspore/common/api.py
@@ -1487,12 +1487,6 @@ class _CellGraphExecutor:
         """
         self._graph_executor.export_graph(file_name, graph_id, encrypt_func, enc_key)
 
-    def fetch_info_for_quant_export(self, exec_id):
-        """Get graph proto from pipeline."""
-        if self._graph_executor.has_compiled(exec_id) is False:
-            return None
-        return self._graph_executor.fetch_info_for_quant_export(exec_id)
-
 
 def ms_memory_recycle():
     """
diff --git a/mindspore/python/mindspore/compression/OWNERS b/mindspore/python/mindspore/compression/OWNERS
deleted file mode 100644
index b71327621c6..00000000000
--- a/mindspore/python/mindspore/compression/OWNERS
+++ /dev/null
@@ -1,4 +0,0 @@
-approvers:
-- zhang_xue_tong
-- jpc_chenjianping
-- hangangqiang
diff --git a/mindspore/python/mindspore/compression/__init__.py b/mindspore/python/mindspore/compression/__init__.py
deleted file mode 100644
index d6d80fd6a10..00000000000
--- a/mindspore/python/mindspore/compression/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-MindSpore compression module.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
diff --git a/mindspore/python/mindspore/compression/common/__init__.py b/mindspore/python/mindspore/compression/common/__init__.py
deleted file mode 100644
index e4f28e7823b..00000000000
--- a/mindspore/python/mindspore/compression/common/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Common module for various compression algorithms, now only including datatype definition for quantization.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-from mindspore.compression.common.constant import QuantDtype
-
-__all__ = ["QuantDtype"]
diff --git a/mindspore/python/mindspore/compression/common/constant.py b/mindspore/python/mindspore/compression/common/constant.py
deleted file mode 100644
index 1ab4bb7e903..00000000000
--- a/mindspore/python/mindspore/compression/common/constant.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Constant module for compression. This is interface that is subject to change or deletion.
-"""
-from __future__ import absolute_import
-
-import enum
-import re
-from types import DynamicClassAttribute
-
-
-__all__ = ["QuantDtype"]
-
-
-@enum.unique
-class QuantDtype(enum.Enum):
-    """
-    An enum for quant datatype, contains `INT2` ~ `INT8`, `UINT2` ~ `UINT8`.
-    """
-    INT2 = "INT2"
-    INT3 = "INT3"
-    INT4 = "INT4"
-    INT5 = "INT5"
-    INT6 = "INT6"
-    INT7 = "INT7"
-    INT8 = "INT8"
-
-    UINT2 = "UINT2"
-    UINT3 = "UINT3"
-    UINT4 = "UINT4"
-    UINT5 = "UINT5"
-    UINT6 = "UINT6"
-    UINT7 = "UINT7"
-    UINT8 = "UINT8"
-
-    def __str__(self):
-        return f"{self.name}"
-
-    @staticmethod
-    def is_signed(dtype):
-        """
-        Get whether the quant datatype is signed.
-
-        Args:
-            dtype (QuantDtype): quant datatype.
-
-        Returns:
-            bool, whether the input quant datatype is signed.
-
-        Examples:
-            >>> quant_dtype = QuantDtype.INT8
-            >>> is_signed = QuantDtype.is_signed(quant_dtype)
-        """
-        return dtype in [QuantDtype.INT2, QuantDtype.INT3, QuantDtype.INT4, QuantDtype.INT5,
-                         QuantDtype.INT6, QuantDtype.INT7, QuantDtype.INT8]
-
-    @staticmethod
-    def switch_signed(dtype):
-        """
-        Switch the signed state of the input quant datatype.
-
-        Args:
-            dtype (QuantDtype): quant datatype.
-
-        Returns:
-            QuantDtype, quant datatype with opposite signed state as the input.
-
-        Examples:
-            >>> quant_dtype = QuantDtype.INT8
-            >>> quant_dtype = QuantDtype.switch_signed(quant_dtype)
-        """
-        type_map = {
-            QuantDtype.INT2: QuantDtype.UINT2,
-            QuantDtype.INT3: QuantDtype.UINT3,
-            QuantDtype.INT4: QuantDtype.UINT4,
-            QuantDtype.INT5: QuantDtype.UINT5,
-            QuantDtype.INT6: QuantDtype.UINT6,
-            QuantDtype.INT7: QuantDtype.UINT7,
-            QuantDtype.INT8: QuantDtype.UINT8,
-            QuantDtype.UINT2: QuantDtype.INT2,
-            QuantDtype.UINT3: QuantDtype.INT3,
-            QuantDtype.UINT4: QuantDtype.INT4,
-            QuantDtype.UINT5: QuantDtype.INT5,
-            QuantDtype.UINT6: QuantDtype.INT6,
-            QuantDtype.UINT7: QuantDtype.INT7,
-            QuantDtype.UINT8: QuantDtype.INT8
-        }
-        return type_map.get(dtype)
-
-    @DynamicClassAttribute
-    def _value(self):
-        """The value of the Enum member."""
-        return int(re.search(r"(\d+)", self._value_).group(1))
-
-    @DynamicClassAttribute
-    def num_bits(self):
-        """
-        Get the num bits of the QuantDtype member.
-
-        Returns:
-            int, the num bits of the QuantDtype member.
-
-        Examples:
-            >>> from mindspore.compression.common import QuantDtype
-            >>> quant_dtype = QuantDtype.INT8
-            >>> num_bits = quant_dtype.num_bits
-            >>> print(num_bits)
-            8
-        """
-        return self._value
diff --git a/mindspore/python/mindspore/compression/export/__init__.py b/mindspore/python/mindspore/compression/export/__init__.py
deleted file mode 100644
index 139f7d7daae..00000000000
--- a/mindspore/python/mindspore/compression/export/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Compression export module.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
diff --git a/mindspore/python/mindspore/compression/export/quant_export.py b/mindspore/python/mindspore/compression/export/quant_export.py
deleted file mode 100644
index d01244e5828..00000000000
--- a/mindspore/python/mindspore/compression/export/quant_export.py
+++ /dev/null
@@ -1,515 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Export for quantization. This is interface that is subject to change or deletion.
-"""
-
-from __future__ import absolute_import
-
-import copy
-
-import numpy as np
-
-from mindspore import log as logger
-from mindspore import nn, ops
-from mindspore._checkparam import Validator
-from mindspore.common import Tensor
-from mindspore.common import dtype as mstype
-from mindspore.common.api import _cell_graph_executor as _executor
-from mindspore.common.parameter import Parameter
-from mindspore.nn import Cell
-from mindspore.nn.layer import quant
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.ops.operations import _inner_ops as inner
-from mindspore.compression.quant import quant_utils
-from mindspore.compression.quant.qat import _AddFakeQuantInput, _AddFakeQuantAfterSubCell
-
-__all__ = ["ExportToQuantInferNetwork"]
-
-
-class QuantBlock(Cell):
-    r"""
-    A quant block of Conv/Dense, activation layer for Ascend deploy.
-
-    Calculate Conv or Dense in Int8, with Quant and DeQuant.
-
-    Notes:
-        This block is only for deploy, and not trainable.
-
-    Args:
-        in_channels (int): The number of channels in the input space.
-        out_channels (int): The number of channels in the output space.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
-            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
-            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
-        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
-        batchnorm (bool): Specifies to used batchnorm or not. Default: None.
-        activation (string): Specifies activation type. The optional values are as following:
-            'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
-            'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
-
-    Inputs:
-        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
-
-    Outputs:
-        Tensor of shape :math:`(N, out\_channels)`.
-    """
-
-    def __init__(self,
-                 core_op,
-                 weight,
-                 quant_op,
-                 dequant_op,
-                 dequant_scale,
-                 bias=None,
-                 activation=None):
-        super(QuantBlock, self).__init__()
-        self.core_op = core_op
-        self.weight = weight
-        self.quant = quant_op
-        self.dequant = dequant_op
-        self.dequant_scale = dequant_scale
-        self.bias = bias
-        self.has_bias = bias is not None
-        self.activation = activation
-        self.has_act = activation is not None
-        self.bias_add = P.BiasAdd()
-        self.sub = P.Sub()
-        self.weight_offset = Parameter(np.zeros(1, dtype=np.int8), name='weight_offset')
-
-    def construct(self, x):
-        x = self.quant(x)
-        if self.has_bias:
-            weight = self.sub(self.weight, self.weight_offset)
-            x = self.core_op(x, weight)
-            x = self.bias_add(x, self.bias)
-        else:
-            x = self.core_op(x, self.weight)
-        x = self.dequant(x, self.dequant_scale)
-        x = F.cast(x, mstype.float32)
-        if self.has_act:
-            x = self.activation(x)
-        return x
-
-    def extend_repr(self):
-        s = f'quant={self.quant}, core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
-        if self.has_bias:
-            s += f', bias=shape[{self.bias.shape}]'
-        if self.has_act:
-            s += f', activation={self.activation}'
-        s += f', dequant={self.dequant}'
-        return s
-
-
-class QuantMindirBlock(Cell):
-    """A quant binary block of Conv/Dense, activation layer for export MINDIR model.
-
-       Args:
-        core_op (Cell): The operation cell.
-        weight (Tensor): The weight of the cell.
-        bias (Tensor): The bias of the cell. Default: None.
-        activation (str): The regularization function applied to the output of the layer, eg. 'relu'. Default: None.
-        param_dict (dict): The information of the cell.
-    """
-
-    def __init__(self,
-                 core_op,
-                 weight,
-                 bias=None,
-                 activation=None,
-                 param_dict=None):
-
-        super(QuantMindirBlock, self).__init__()
-        self.core_op = core_op
-        if activation is not None:
-            self.core_op.add_prim_attr("activation_name", activation.__class__.__name__)
-        self.core_op.add_prim_attr("filter_maxq", Tensor(param_dict["filter_maxq"]))
-        self.core_op.add_prim_attr("filter_minq", Tensor(param_dict["filter_minq"]))
-        if param_dict["output_maxq"] is not None:
-            self.core_op.add_prim_attr("output_maxq", Tensor(param_dict["output_maxq"]))
-            self.core_op.add_prim_attr("output_minq", Tensor(param_dict["output_minq"]))
-        self.core_op.add_prim_attr("symmetric", Tensor(param_dict["symmetric"]))
-        if hasattr(core_op, 'pad_mode'):
-            self.core_op.add_prim_attr("pad_mode", core_op.pad_mode)
-        self.core_op.add_prim_attr("act_num_bits", Tensor(8))
-        self.core_op.add_prim_attr("weight_num_bits", Tensor(param_dict["weight_num_bits"]))
-        self.core_op.add_prim_attr("weight_narrow_range", Tensor(param_dict["weight_narrow_range"]))
-        if param_dict["input_narrow_range"] is not None:
-            self.core_op.add_prim_attr("input_narrow_range", Tensor(param_dict["input_narrow_range"]))
-        if param_dict["output_narrow_range"] is not None:
-            self.core_op.add_prim_attr("output_narrow_range", Tensor(param_dict["output_narrow_range"]))
-        if param_dict["input_maxq"] == 'None':
-            self.core_op.add_prim_attr("mean", Tensor(param_dict["mean"]))
-            self.core_op.add_prim_attr("std_dev", Tensor(param_dict["std_dev"]))
-        elif param_dict["input_maxq"] is not None:
-            self.core_op.add_prim_attr("input_maxq", Tensor(param_dict["input_maxq"]))
-            self.core_op.add_prim_attr("input_minq", Tensor(param_dict["input_minq"]))
-
-        self.weight = weight
-        self.bias = bias
-        self.has_bias = bias is not None
-        self.activation = activation
-        self.has_act = activation is not None
-        self.bias_add = P.BiasAdd()
-
-    def construct(self, x):
-        if self.has_bias:
-            x = self.core_op(x, self.weight)
-            x = self.bias_add(x, self.bias)
-        else:
-            x = self.core_op(x, self.weight)
-        if self.has_act:
-            x = self.activation(x)
-        return x
-
-    def extend_repr(self):
-        s = f'core_op={type(self.core_op)}, weight=shape[{self.weight.shape}]'
-        if self.has_bias:
-            s += f', bias=shape[{self.bias.shape}]'
-        if self.has_act:
-            s += f', activation={self.activation}'
-        return s
-
-
-class ExportToQuantInferNetwork:
-    """
-    Convert quantization aware network to infer network.
-
-    Args:
-        network (Cell): MindSpore quantization aware training network.
-        inputs (Tensor): Input tensors of the `quantization aware training network`.
-        mean (int, float): The mean of input data after preprocessing, used for quantizing the first layer of network.
-          Default: 127.5.
-        std_dev (int, float): The variance of input data after preprocessing, used for quantizing the first layer
-          of network. Default: 127.5.
-        is_mindir (bool): Whether export MINDIR format. Default: False.
-
-    Returns:
-        Cell, Infer network.
-    """
-
-    def __init__(self, network, mean, std_dev, *inputs, is_mindir=False):
-        network = Validator.check_isinstance('network', network, (nn.Cell,))
-        self.data_type = mstype.int8
-        self.network = copy.deepcopy(network)
-        self.network_bk = copy.deepcopy(network)
-        self.get_inputs_table(inputs)
-        self.mean = mean
-        self.std_dev = std_dev
-        self.is_mindir = is_mindir
-        self.upcell = None
-
-    @staticmethod
-    def __get_dequant_scale(scale_a_in, scale_w):
-        """Get dequant scale"""
-        scale_deq = scale_a_in * scale_w
-
-        # fuse parameter
-        # |--------|47:40|--------|39:32|--------|31:0|
-        #         offset_w [8]    shift_N [8]    deq_scale [32]
-        float32_deq_scale = scale_deq.astype(np.float32)
-        uint32_deq_scale = np.frombuffer(float32_deq_scale, np.uint32)
-        scale_length = scale_deq.size  # channel
-        dequant_param = np.zeros(scale_length, dtype=np.uint64)
-        for index in range(scale_length):
-            dequant_param[index] += uint32_deq_scale[index]
-        scale_deq = Tensor(dequant_param, mstype.uint64)
-        return scale_deq
-
-    def get_inputs_table(self, inputs):
-        """Get the input quantization parameters of quantization cell for quant export."""
-        phase_name = 'export_quant'
-        graph_id, _ = _executor.compile(self.network, *inputs, phase=phase_name, do_convert=False)
-        self.quant_info_table = _executor.fetch_info_for_quant_export(graph_id)
-
-    def run(self):
-        """Start to convert."""
-        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
-                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
-        self.network.update_cell_prefix()
-        network = self.network
-        if isinstance(network, _AddFakeQuantInput):
-            network = network.network
-        network = self._convert_quant2deploy(network)
-        return network
-
-    def _get_quant_block(self, cell_core, activation, fake_quant_a_out):
-        """convert network's quant subcell to deploy subcell"""
-        scale_a_in, zp_a_in, scale_w, zp_w, param_dict = self.__get_quant_param(cell_core, fake_quant_a_out)
-
-        # Build the `Quant` `Dequant` op.
-        # Quant only support perlayer version. Need check here.
-        if float(scale_a_in) == 0:
-            raise ValueError("If `scale_a_in` is zero, will lead to zero error.")
-        quant_op = inner.Quant(1 / float(scale_a_in), float(zp_a_in))
-        scale_deq = self.__get_dequant_scale(scale_a_in, scale_w)
-        dequant_op = inner.Dequant()
-
-        if isinstance(activation, _AddFakeQuantAfterSubCell):
-            activation = activation.subcell
-        elif hasattr(activation, "get_origin"):
-            activation = activation.get_origin()
-
-        # get op
-        if isinstance(cell_core, quant.DenseQuant):
-            op_core = P.MatMul()
-        else:
-            op_core = cell_core.conv
-
-        # get the `weight` and `bias`
-        weight, bias, weight_b, bias_b = self.__get_weight_bias(cell_core, scale_a_in, scale_w, zp_w)
-
-        if self.is_mindir:
-            block = QuantMindirBlock(op_core, weight_b, bias_b, activation, param_dict)
-        else:
-            block = QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
-        return block
-
-    def _get_input_quant_param(self, minq_name, np_type, param_dict):
-        """get input quant parameter for quant block"""
-        fake_quant_a_in_prefix = minq_name[:-5]
-        cells = self.network_bk.cells_and_names()
-        for cell in cells:
-            if cell[0].endswith(fake_quant_a_in_prefix):
-                fake_quant_a_in = cell[1]
-                break
-        scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
-            quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_in, np_type)
-        param_dict["input_narrow_range"] = fake_quant_a_in.narrow_range
-        return scale_a_in, zp_a_in
-
-    def __get_quant_param(self, cell_core, fake_quant_a_out):
-        """get parameter for quant block"""
-        w_minq_name = cell_core.fake_quant_weight.minq.name
-        w_maxq_name = cell_core.fake_quant_weight.maxq.name
-        np_type = mstype.dtype_to_nptype(self.data_type)
-        param_dict = dict()
-        param_dict["filter_maxq"] = None
-        param_dict["filter_minq"] = None
-        param_dict["output_maxq"] = None
-        param_dict["output_minq"] = None
-        param_dict["input_maxq"] = None
-        param_dict["input_minq"] = None
-        param_dict["input_narrow_range"] = None
-        param_dict["output_narrow_range"] = None
-        param_dict["weight_narrow_range"] = cell_core.fake_quant_weight.narrow_range
-        param_dict["mean"] = self.mean
-        param_dict["std_dev"] = self.std_dev
-        param_dict["symmetric"] = cell_core.fake_quant_weight.symmetric
-        param_dict["weight_num_bits"] = cell_core.fake_quant_weight.num_bits
-
-        scale_w, zp_w, param_dict["filter_maxq"], param_dict["filter_minq"] = \
-            quant_utils.scale_zp_max_min_from_fake_quant_cell(cell_core.fake_quant_weight, np_type)
-        if fake_quant_a_out is not None:
-            _, _, param_dict["output_maxq"], param_dict["output_minq"] = \
-                quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_a_out, np_type)
-            param_dict["output_narrow_range"] = fake_quant_a_out.narrow_range
-
-        info = self.quant_info_table.get(w_minq_name, None)
-        if not info:
-            info = self.quant_info_table.get(w_maxq_name, None)
-        if info:
-            _, minq_name = info
-            if minq_name == 'input':
-                scale_a_in, zp_a_in, param_dict["input_maxq"], param_dict["input_minq"] = \
-                    (1 / self.std_dev), round(self.mean), 'None', 'None'
-            else:
-                scale_a_in, zp_a_in = self._get_input_quant_param(minq_name, np_type, param_dict)
-        else:
-            # skip quant layer
-            scale_a_in, zp_a_in = 1.0, 0.0
-        return scale_a_in, zp_a_in, scale_w, zp_w, param_dict
-
-    def __get_weight_bias(self, cell_core, scale_a_in, scale_w, zp_w):
-        """Get weight and bias for quantizaiton"""
-        np_type = mstype.dtype_to_nptype(self.data_type)
-        weight = cell_core.weight.data.asnumpy()
-        bias = None
-        if isinstance(cell_core, (quant.DenseQuant, quant.Conv2dQuant)):
-            if cell_core.has_bias:
-                bias = cell_core.bias.data.asnumpy()
-        elif isinstance(cell_core, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv)):
-            weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
-        elif isinstance(cell_core, quant.Conv2dBnWithoutFoldQuant):
-            weight, bias = quant_utils.without_fold_batchnorm(weight, cell_core)
-        weight_b = weight
-        bias_b = bias
-        # apply the quant
-        quant_min, quant_max = quant_utils.get_quant_min_max(np_type,
-                                                             cell_core.fake_quant_weight.num_bits,
-                                                             cell_core.fake_quant_weight.narrow_range)
-        weight = quant_utils.weight2int(weight, scale_w, zp_w, quant_min, quant_max)
-        if bias is not None:
-            if 0 in scale_a_in:
-                raise ValueError("Zero exist in `scale_a_in` which will lead to divide zero error.")
-            if 0 in scale_w:
-                raise ValueError("Zero exist in `scale_w` which will lead to divide zero error.")
-            bias = Tensor(bias / scale_a_in / scale_w, mstype.int32)
-
-        if isinstance(cell_core, quant.DenseQuant):
-            weight = np.transpose(weight)
-            weight_b = np.transpose(weight_b)
-
-        weight_tensor = Tensor(weight, self.data_type)
-        weight_b_tensor = Tensor(weight_b)
-        if bias_b is not None:
-            bias_b_tensor = Tensor(bias_b, mstype.float32)
-            return weight_tensor, bias, weight_b_tensor, bias_b_tensor
-        return weight_tensor, bias, weight_b_tensor, None
-
-    def _add_output_min_max_for_op(self, origin_op, fake_quant_cell):
-        """add output quant info for quant op for export mindir."""
-        if self.is_mindir:
-            if isinstance(origin_op, ops.Primitive) and not hasattr(origin_op, 'output_minq'):
-                np_type = mstype.dtype_to_nptype(self.data_type)
-                _, _, maxq, minq = quant_utils.scale_zp_max_min_from_fake_quant_cell(fake_quant_cell, np_type)
-                origin_op.add_prim_attr('output_maxq', Tensor(maxq))
-                origin_op.add_prim_attr('output_minq', Tensor(minq))
-
-    def _convert_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell."""
-        if subcell is not None and hasattr(subcell, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(subcell, None, None)
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            self.upcell = new_subcell
-            network.insert_child_to_cell(name, new_subcell)
-            change = True
-        return network, change
-
-    def _convert_conv(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for conv."""
-        cell_core = subcell.conv
-        activation = subcell.activation
-        fake_quant_act = None
-        if hasattr(activation, 'fake_quant_act_before'):
-            fake_quant_act = activation.fake_quant_act_before
-        elif hasattr(activation, 'fake_quant_act'):
-            fake_quant_act = activation.fake_quant_act
-        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
-            self.upcell = None
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            network.insert_child_to_cell(name, new_subcell)
-            change = True
-        return network, change
-
-    def _convert_dense(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for dense."""
-        cell_core = subcell.dense
-        activation = subcell.activation
-        fake_quant_act = None
-        if hasattr(activation, 'fake_quant_act_before'):
-            fake_quant_act = activation.fake_quant_act_before
-        elif hasattr(activation, 'fake_quant_act'):
-            fake_quant_act = activation.fake_quant_act
-        if cell_core is not None and hasattr(cell_core, "fake_quant_weight"):
-            new_subcell = self._get_quant_block(cell_core, activation, fake_quant_act)
-            prefix = subcell.param_prefix
-            new_subcell.update_parameters_name(prefix + '.')
-            network.insert_child_to_cell(name, new_subcell)
-            self.upcell = None
-            change = True
-        return network, change
-
-    def _convert_act(self, subcell):
-        """Convert subcell to ant subcell for activation."""
-        activation = subcell.get_origin()
-        if isinstance(activation, nn.ReLU):
-            self._add_output_min_max_for_op(activation.relu, subcell.fake_quant_act)
-        elif isinstance(activation, nn.ReLU6):
-            self._add_output_min_max_for_op(activation.relu6, subcell.fake_quant_act)
-        if self.upcell:
-            self._add_output_min_max_for_op(self.upcell.core_op, subcell.fake_quant_act)
-        return activation
-
-    def _convert_add(self, subcell):
-        """Convert subcell to ant subcell for add."""
-        if isinstance(subcell.add, _AddFakeQuantAfterSubCell):
-            add_op = subcell.add.subcell
-            subcell.__delattr__("add")
-            subcell.__setattr__("add", add_op)
-        add_op = subcell.add
-        self._add_output_min_max_for_op(add_op, subcell.fake_quant_act)
-        subcell.__delattr__("fake_quant_act")
-        subcell.__setattr__("fake_quant_act", P.identity())
-
-    def _convert_observer(self, network, name, subcell):
-        """Convert subcell to ant subcell for FakeQuantWithMinMaxObserver."""
-        if self.upcell:
-            self._add_output_min_max_for_op(self.upcell.core_op, subcell)
-        network.__delattr__(name)
-        network.__setattr__(name, P.identity())
-
-    def _convert_fake_quant_after_cell(self, network, name, subcell):
-        """Convert subcell to ant subcell for _AddFakeQuantAfterSubCell."""
-        op = subcell.subcell
-        self._add_output_min_max_for_op(op, subcell.fake_quant_act)
-        network.__delattr__(name)
-        network.__setattr__(name, op)
-
-    def _convert_core_quant_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for conv and dense."""
-        is_core_subcell = True
-        if isinstance(subcell, nn.Conv2dBnAct):
-            network, change = self._convert_conv(network, change, name, subcell)
-        elif isinstance(subcell, nn.DenseBnAct):
-            network, change = self._convert_dense(network, change, name, subcell)
-        elif isinstance(subcell, (quant.Conv2dBnFoldQuant, quant.Conv2dBnFoldQuantOneConv,
-                                  quant.Conv2dBnWithoutFoldQuant, quant.Conv2dQuant, quant.DenseQuant)):
-            network, change = self._convert_subcell(network, change, name, subcell)
-        else:
-            is_core_subcell = False
-        return is_core_subcell, network, change
-
-    def _convert_other_quant_subcell(self, network, change, name, subcell):
-        """Convert subcell to ant subcell for cell except conv and dense."""
-        is_other_subcell = True
-        if isinstance(subcell, nn.ActQuant) and hasattr(subcell, "get_origin"):
-            activation = self._convert_act(subcell)
-            network.insert_child_to_cell(name, activation)
-            change = True
-        elif isinstance(subcell, nn.TensorAddQuant):
-            self._convert_add(subcell)
-        elif isinstance(subcell, quant.FakeQuantWithMinMaxObserver):
-            self._convert_observer(network, name, subcell)
-        elif isinstance(subcell, _AddFakeQuantAfterSubCell):
-            self._convert_fake_quant_after_cell(network, name, subcell)
-            change = True
-        else:
-            is_other_subcell = False
-        return is_other_subcell, network, change
-
-    def _convert_quant2deploy(self, network):
-        """Convert network's all quant subcell to deploy subcell."""
-        cells = network.name_cells()
-        change = False
-        for name in cells:
-            subcell = cells[name]
-            if subcell == network:
-                continue
-            is_core_quant_subcell, network, change = self._convert_core_quant_subcell(network, change, name, subcell)
-            is_other_quant_subcell, network, change = self._convert_other_quant_subcell(network, change, name, subcell)
-            if not is_core_quant_subcell and not is_other_quant_subcell:
-                self.upcell = None
-                self._convert_quant2deploy(subcell)
-        if isinstance(network, nn.SequentialCell) and change:
-            network.cell_list = list(network.cells())
-        return network
diff --git a/mindspore/python/mindspore/compression/quant/__init__.py b/mindspore/python/mindspore/compression/quant/__init__.py
deleted file mode 100644
index 7c098f17604..00000000000
--- a/mindspore/python/mindspore/compression/quant/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization module, including base class of the quantizer, the quantization aware training algorithm,
-and quantization utils.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-from .quantizer import OptimizeOption
-from .qat import QuantizationAwareTraining, create_quant_config
-from .quant_utils import load_nonquant_param_into_quant_net, query_quant_layers
-
-__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers", "QuantizationAwareTraining",
-           "create_quant_config", "OptimizeOption"]
diff --git a/mindspore/python/mindspore/compression/quant/qat.py b/mindspore/python/mindspore/compression/quant/qat.py
deleted file mode 100644
index a1826c6923b..00000000000
--- a/mindspore/python/mindspore/compression/quant/qat.py
+++ /dev/null
@@ -1,634 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization aware training
-
-User can use quantization aware to train a model. MindSpore supports quantization aware training,
-which models quantization errors in both the forward and backward passes using fake-quantization
-operations. Note that the entire computation is carried out in floating point. At the end of quantization
-aware training, MindSpore provides conversion functions to convert the trained model into lower precision.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-import re
-import numpy as np
-import mindspore.context as context
-from mindspore import log as logger
-from mindspore import nn, ops
-from mindspore._checkparam import Validator, Rel
-from mindspore.nn.layer import quant
-from mindspore.ops import functional as F
-from ..common import QuantDtype
-from .quantizer import Quantizer, OptimizeOption
-from .quant_utils import compute_kl_threshold
-
-__all__ = ["QuantizationAwareTraining", "create_quant_config"]
-
-
-def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver),
-                        quant_delay=(0, 0),
-                        quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
-                        per_channel=(False, False),
-                        symmetric=(False, False),
-                        narrow_range=(False, False),
-                        mode="DEFAULT"):
-    r"""
-    Config the observer type of weights and data flow with quant parameters.
-
-    Args:
-        quant_observer (Union[Observer, list, tuple]): The types of observer for quantization. The first element
-            applies to weights and the second applies to data flow. Currently, only
-            :class:`FakeQuantWithMinMaxObserver` supported.
-            Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver).
-        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
-            Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow.
-            Default: (QuantDtype.INT8, QuantDtype.INT8).
-        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights
-            and the second element represents data flow, and the second element must be `False` now.
-            Default: (False, False).
-        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
-            element represents data flow. Default: (False, False).
-        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
-            The first element represents weights and the second element represents data flow.
-            Default: (False, False).
-        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
-            Default: "DEFAULT".
-
-    Returns:
-        QuantConfig, contains the observer type of weight and activation.
-
-    Raises:
-        ValueError: If the second element of `per_channel` is not `False`.
-    """
-    if per_channel[-1]:
-        raise ValueError("Arg 'per_channel' second element must be 'False'.")
-    weight_observer = quant_observer[0].partial_init(quant_delay=quant_delay[0], quant_dtype=quant_dtype[0],
-                                                     per_channel=per_channel[0], symmetric=symmetric[0],
-                                                     narrow_range=narrow_range[0], mode=mode)
-    act_observer = quant_observer[-1].partial_init(quant_delay=quant_delay[-1], quant_dtype=quant_dtype[-1],
-                                                   per_channel=per_channel[-1], symmetric=symmetric[-1],
-                                                   narrow_range=narrow_range[-1], mode=mode)
-    return quant.QuantConfig(weight=weight_observer, activation=act_observer)
-
-
-class _AddFakeQuantInput(nn.Cell):
-    """
-    Add FakeQuant OP at input of the network. Only support one input case.
-    """
-
-    def __init__(self, network, quant_delay=0):
-        super(_AddFakeQuantInput, self).__init__(auto_prefix=False)
-        self.fake_quant_input = quant.FakeQuantWithMinMaxObserver(min_init=-6, max_init=6,
-                                                                  quant_delay=quant_delay, ema=True)
-        self.fake_quant_input.update_parameters_name('fake_quant_input.')
-        self.network = network
-
-    def construct(self, data):
-        data = self.fake_quant_input(data)
-        output = self.network(data)
-        return output
-
-
-class _AddFakeQuantAfterSubCell(nn.Cell):
-    """
-    Add FakeQuant OP after of the sub Cell.
-    """
-
-    def __init__(self, subcell, **kwargs):
-        super(_AddFakeQuantAfterSubCell, self).__init__(auto_prefix=False)
-        self.subcell = subcell
-        self.mode = "DEFAULT"
-        self.max_init = 6
-        self.min_init = -6
-
-        if kwargs.get("optimize_option") is not None and OptimizeOption.LEARNED_SCALE in kwargs["optimize_option"]:
-            self.mode = "LEARNED_SCALE"
-            self.max_init = 16
-            self.min_init = -16
-
-        self.fake_quant_act = quant.FakeQuantWithMinMaxObserver(min_init=self.min_init,
-                                                                max_init=self.max_init,
-                                                                ema=True,
-                                                                quant_dtype=kwargs.get("quant_dtype"),
-                                                                quant_delay=kwargs.get("quant_delay"),
-                                                                per_channel=kwargs.get("per_channel"),
-                                                                symmetric=kwargs.get("symmetric"),
-                                                                narrow_range=kwargs.get("narrow_range"),
-                                                                mode=self.mode)
-
-    def construct(self, *data):
-        output = self.subcell(*data)
-        output = self.fake_quant_act(output)
-        return output
-
-
-class QuantizationAwareTraining(Quantizer):
-    r"""
-    Quantizer for quantization aware training.
-
-    Args:
-        bn_fold (bool): Whether to use bn fold ops for simulation inference operation. Default: True.
-        freeze_bn (int): Number of steps after which BatchNorm OP parameters fixed to global mean and variance.
-            Default: 1e7.
-        quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized
-            during train and eval. The first element represents weights and the second element represents data flow.
-            Default: (0, 0).
-        quant_dtype (Union[QuantDtype, list, tuple]): Datatype used to quantize weights and activations. The first
-            element represents weights and the second element represents data flow. It is necessary to consider the
-            precision support of hardware devices in the practical quantization infer scenario.
-            Default: (QuantDtype.INT8, QuantDtype.INT8).
-        per_channel (Union[bool, list, tuple]):  Quantization granularity based on layer or on channel. If `True`
-            then base on per channel, otherwise base on per layer. The first element represents weights and the
-            second element represents data flow, and the second element must be `False` now. Default: (False, False).
-        symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
-            base on symmetric, otherwise base on asymmetric. The first element represents weights and the second
-            element represents data flow. Default: (False, False).
-        narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
-            The first element represents weights and the second element represents data flow.
-            Default: (False, False).
-        optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently
-            only support `QAT` and `LEARNED_SCALE` (Note that, if both `QAT` and `LEARNED_SCALE` are configured,
-            `LEARNED_SCALE` has a higher priority. `LEARNED_SCALE` currently only work under some constraints, which
-            includes: freeze_bn=0, quant_delay=0, symmetric=True, narrow_range=True, More specifically, for operators
-            such as Relu and Relu6, which only have positive values, we add a negative truncation to optimize this
-            scenario, and narrow_range will automatically match to False). Default: OptimizeOption.QAT.
-        one_conv_fold (bool): Whether to use one conv bn fold ops for simulation inference operation. Default: True.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Raises:
-        TypeError: If the element of `quant_delay` or `freeze_bn` is not int.
-        TypeError: If `bn_fold`, `one_conv_fold` or the element of `per_channel`, `symmetric`, `narrow_range`
-            is not bool.
-        TypeError: If the element of `quant_dtype` is not `QuantDtype`.
-        ValueError: If the length of `quant_delay`, `quant_dtype`, `per_channel`, `symmetric` or `narrow_range` is
-            not less than 2.
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `freeze_bn` is not equal to 0.
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `symmetric` is not (True, True).
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `narrow_range` is not (True, True).
-        ValueError: If the `optimize_option` is `LEARNED_SCALE` and `quant_delay` is not (0, 0).
-
-    Examples:
-        >>> from mindspore.compression.quant import QuantizationAwareTraining
-        >>> from mindspore import nn
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
-        >>> net_qat = quantizer.quantize(net)
-    """
-    __quant_op_name = ["Add", "Sub", "Mul", "RealDiv", "ReduceMean"]
-
-    def __init__(self,
-                 bn_fold=True,
-                 freeze_bn=10000000,
-                 quant_delay=(0, 0),
-                 quant_dtype=(QuantDtype.INT8, QuantDtype.INT8),
-                 per_channel=(False, False),
-                 symmetric=(False, False),
-                 narrow_range=(False, False),
-                 optimize_option=OptimizeOption.QAT,
-                 one_conv_fold=True):
-        """Init for QuantizationAwareTraining quantizer"""
-        super(QuantizationAwareTraining, self).__init__(optimize_option=optimize_option)
-
-        def convert2list(name, value):
-            if not isinstance(value, list) and not isinstance(value, tuple):
-                value = [value]
-            elif len(value) > 2:
-                raise ValueError("input `{}` len should less then 2".format(name))
-            return value
-
-        quant_delay_list = convert2list("quant delay", quant_delay)
-        quant_dtype_list = convert2list("quant dtype", quant_dtype)
-        per_channel_list = convert2list("per channel", per_channel)
-        symmetric_list = convert2list("symmetric", symmetric)
-        narrow_range_list = convert2list("narrow range", narrow_range)
-
-        self.weight_qdelay = Validator.check_non_negative_int(quant_delay_list[0], "quant delay")
-        self.act_qdelay = Validator.check_int(quant_delay_list[-1], 0, Rel.GE, "quant delay")
-        self.bn_fold = Validator.check_bool(bn_fold, "bn fold")
-        self.freeze_bn = Validator.check_non_negative_int(freeze_bn, "freeze bn")
-        self.weight_dtype = Validator.check_isinstance("weights dtype", quant_dtype_list[0], QuantDtype)
-        self.act_dtype = Validator.check_isinstance("activations dtype", quant_dtype_list[-1], QuantDtype)
-        self.weight_channel = Validator.check_bool(per_channel_list[0], "per channel")
-        self.act_channel = Validator.check_bool(per_channel_list[-1], "per channel")
-        self.weight_symmetric = Validator.check_bool(symmetric_list[0], "symmetric")
-        self.act_symmetric = Validator.check_bool(symmetric_list[-1], "symmetric")
-        self.weight_range = Validator.check_bool(narrow_range_list[0], "narrow range")
-        self.act_range = Validator.check_bool(narrow_range_list[-1], "narrow range")
-        self.one_conv_fold = Validator.check_bool(one_conv_fold, "one conv fold")
-        self._convert_method_map = {nn.Conv2dBnAct: self._convert_conv,
-                                    nn.DenseBnAct: self._convert_dense}
-        self.mode = "DEFAULT"
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            self.mode = "LEARNED_SCALE"
-            if not self.weight_symmetric or not self.act_symmetric:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support "
-                                 "symmetric=(True, True) for quant")
-            if not self.weight_range or not self.act_range:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support narrow_range=(True, True) "
-                                 "for quant")
-            if self.freeze_bn != 0:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support freeze_bn equal to 0, "
-                                 "but get freeze_bn={}".format(self.freeze_bn))
-            if self.weight_qdelay != 0 or self.act_qdelay != 0:
-                raise ValueError("OptimizeOption.LEARNED_SCALE currently only support quant_delay=(0, 0)")
-        self.quant_config = create_quant_config(quant_delay=quant_delay_list,
-                                                quant_dtype=quant_dtype_list,
-                                                per_channel=per_channel_list,
-                                                symmetric=symmetric_list,
-                                                narrow_range=narrow_range_list,
-                                                mode=self.mode)
-        self.eps = 1e-5
-
-    @staticmethod
-    def _convert_op_name(name):
-        pattern = re.compile(r'([A-Z]{1})')
-        name_new = re.sub(pattern, r'_\1', name).lower()
-        if name_new[0] == '_':
-            name_new = name_new[1:]
-        return name_new
-
-    def quantize(self, network):
-        """
-        Quant API to convert input network to a quantization aware training network.
-
-        Note:
-            Please refer to the Examples of class: `mindspore.compression.quant.QuantizationAwareTraining`.
-
-        Args:
-            network (Cell): network to be quantized.
-
-        Returns:
-            Cell, a quantization aware training network.
-
-        Raises:
-            KeyError: If the `device_target` set in context is not in `support_device`.
-        """
-
-        logger.warning("The compression module is deprecated and may not be supported in later version, please use "
-                       "MindSpore Golden Stick(https://gitee.com/mindspore/golden-stick) instead.")
-        support_device = ["Ascend", "GPU"]
-        if context.get_context('device_target') not in support_device:
-            raise KeyError("Unsupported {} device target.".format(context.get_context('device_target')))
-
-        if OptimizeOption.QAT in self.optimize_option or OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            network.update_cell_prefix()
-            network = self._convert_subcells2quant(network)
-            network.update_cell_type("quant")
-        return network
-
-    def _convert_subcells2quant(self, network):
-        """
-        convert sub cell like `Conv2dBnAct` and `DenseBnAct` to quant cell
-        """
-        cells = network.name_cells()
-        change = False
-        for name in cells:
-            subcell = cells[name]
-            if subcell == network:
-                continue
-            if isinstance(subcell, (nn.Conv2dBnAct, nn.DenseBnAct)):
-                prefix = subcell.param_prefix
-                new_subcell = self._convert_method_map[type(subcell)](subcell)
-                new_subcell.update_parameters_name(prefix + '.')
-                network.insert_child_to_cell(name, new_subcell)
-                change = True
-            else:
-                self._convert_subcells2quant(subcell)
-        if isinstance(network, nn.SequentialCell) and change:
-            network.cell_list = list(network.cells())
-
-        # add FakeQuant OP after OP in white list, but not including those wrapped in the below quantization cell.
-        if isinstance(network, (nn.FakeQuantWithMinMaxObserver,
-                                nn.Conv2dBnFoldQuantOneConv,
-                                nn.Conv2dBnFoldQuant,
-                                nn.Conv2dBnWithoutFoldQuant,
-                                nn.Conv2dQuant,
-                                nn.DenseQuant,
-                                nn.ActQuant,
-                                nn.TensorAddQuant,
-                                nn.MulQuant)):
-            return network
-
-        add_list = []
-        for name in network.__dict__:
-            if name[0] == '_':
-                continue
-            attr = network.__dict__[name]
-            if isinstance(attr, ops.Primitive) and attr.name in self.__quant_op_name:
-                add_list.append((name, attr))
-        for name, prim_op in add_list:
-            prefix = name
-            add_quant = _AddFakeQuantAfterSubCell(prim_op,
-                                                  quant_dtype=self.act_dtype,
-                                                  quant_delay=self.act_qdelay,
-                                                  per_channel=self.act_channel,
-                                                  symmetric=self.act_symmetric,
-                                                  narrow_range=self.act_range,
-                                                  optimize_option=self.optimize_option)
-            if network.param_prefix:
-                prefix = '.'.join([network.param_prefix, prefix])
-            add_quant.update_parameters_name(prefix + '.')
-            del network.__dict__[name]
-            network.insert_child_to_cell(name, add_quant)
-        return network
-
-    def _convert_conv(self, subcell):
-        """
-        convert Conv2d cell to quant cell
-        """
-        min_init = -6
-        max_init = 6
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            subcell_weight_para = subcell.conv.weight.data.asnumpy()
-            if subcell.has_bn:
-                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
-                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
-        self.quant_config = self.quant_config._replace(
-            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
-
-        conv_inner = subcell.conv
-        if subcell.has_bn:
-            bn_inner = subcell.batchnorm
-            if self.bn_fold:
-                if self.one_conv_fold:
-                    conv_inner = quant.Conv2dBnFoldQuantOneConv(conv_inner.in_channels,
-                                                                conv_inner.out_channels,
-                                                                kernel_size=conv_inner.kernel_size,
-                                                                stride=conv_inner.stride,
-                                                                pad_mode=conv_inner.pad_mode,
-                                                                padding=conv_inner.padding,
-                                                                dilation=conv_inner.dilation,
-                                                                group=conv_inner.group,
-                                                                eps=bn_inner.eps,
-                                                                momentum=1 - bn_inner.momentum,
-                                                                has_bias=conv_inner.has_bias,
-                                                                bias_init=conv_inner.bias_init,
-                                                                quant_config=self.quant_config,
-                                                                quant_dtype=self.weight_dtype,
-                                                                fake=True)
-                else:
-                    conv_inner = quant.Conv2dBnFoldQuant(conv_inner.in_channels,
-                                                         conv_inner.out_channels,
-                                                         kernel_size=conv_inner.kernel_size,
-                                                         stride=conv_inner.stride,
-                                                         pad_mode=conv_inner.pad_mode,
-                                                         padding=conv_inner.padding,
-                                                         dilation=conv_inner.dilation,
-                                                         group=conv_inner.group,
-                                                         eps=bn_inner.eps,
-                                                         momentum=1 - bn_inner.momentum,
-                                                         has_bias=conv_inner.has_bias,
-                                                         bias_init=conv_inner.bias_init,
-                                                         freeze_bn=self.freeze_bn,
-                                                         quant_config=self.quant_config,
-                                                         quant_dtype=self.weight_dtype,
-                                                         fake=True)
-                # change original network Batch Normalization OP parameters to quant network
-                conv_inner.gamma = subcell.batchnorm.gamma
-                conv_inner.beta = subcell.batchnorm.beta
-                conv_inner.moving_mean = subcell.batchnorm.moving_mean
-                conv_inner.moving_variance = subcell.batchnorm.moving_variance
-            else:
-                conv_inner = quant.Conv2dBnWithoutFoldQuant(conv_inner.in_channels,
-                                                            conv_inner.out_channels,
-                                                            kernel_size=conv_inner.kernel_size,
-                                                            stride=conv_inner.stride,
-                                                            pad_mode=conv_inner.pad_mode,
-                                                            padding=conv_inner.padding,
-                                                            dilation=conv_inner.dilation,
-                                                            group=conv_inner.group,
-                                                            eps=bn_inner.eps,
-                                                            momentum=1 - bn_inner.momentum,
-                                                            has_bias=conv_inner.has_bias,
-                                                            bias_init=conv_inner.bias_init,
-                                                            quant_config=self.quant_config)
-                # change original network Batch Normalization OP parameters to quant network
-                conv_inner.batchnorm.gamma = subcell.batchnorm.gamma
-                conv_inner.batchnorm.beta = subcell.batchnorm.beta
-                conv_inner.batchnorm.moving_mean = subcell.batchnorm.moving_mean
-                conv_inner.batchnorm.moving_variance = subcell.batchnorm.moving_variance
-            del subcell.batchnorm
-            subcell.batchnorm = None
-            subcell.has_bn = False
-        else:
-            conv_inner = quant.Conv2dQuant(conv_inner.in_channels, conv_inner.out_channels,
-                                           kernel_size=conv_inner.kernel_size, stride=conv_inner.stride,
-                                           pad_mode=conv_inner.pad_mode, padding=conv_inner.padding,
-                                           dilation=conv_inner.dilation, group=conv_inner.group,
-                                           has_bias=conv_inner.has_bias, quant_config=self.quant_config,
-                                           quant_dtype=self.weight_dtype)
-        # change original network Conv2D OP parameters to quant network
-        conv_inner.weight = subcell.conv.weight
-        if subcell.conv.has_bias:
-            conv_inner.bias = subcell.conv.bias
-        subcell.conv = conv_inner
-        if subcell.has_act and subcell.activation is not None:
-            subcell.activation = self._convert_activation(subcell.activation)
-        elif subcell.after_fake:
-            subcell.has_act = True
-            subcell.activation = _AddFakeQuantAfterSubCell(F.identity, quant_dtype=self.act_dtype,
-                                                           quant_delay=self.act_qdelay, per_channel=self.act_channel,
-                                                           symmetric=self.act_symmetric, narrow_range=self.act_range,
-                                                           optimize_option=self.optimize_option)
-        return subcell
-
-    def _convert_dense(self, subcell):
-        """
-        convert dense cell to quant cell
-        """
-        min_init = -6
-        max_init = 6
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        if OptimizeOption.LEARNED_SCALE in self.optimize_option:
-            subcell_weight_para = subcell.dense.weight.data.asnumpy()
-            if subcell.has_bn:
-                scale_factor = (subcell.batchnorm.gamma.data.asnumpy() /
-                                np.sqrt(subcell.batchnorm.moving_variance.data.asnumpy() + self.eps))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-            min_init, max_init = self._kl_init(subcell_weight_para, self.weight_dtype)
-        self.quant_config = self.quant_config._replace(
-            weight=self.quant_config.weight.partial_init(min_init=min_init, max_init=max_init))
-
-        dense_inner = subcell.dense
-        dense_inner = quant.DenseQuant(dense_inner.in_channels,
-                                       dense_inner.out_channels,
-                                       has_bias=dense_inner.has_bias,
-                                       quant_config=self.quant_config,
-                                       quant_dtype=self.weight_dtype)
-        # change original network Dense OP parameters to quant network
-        dense_inner.weight = subcell.dense.weight
-        if subcell.dense.has_bias:
-            dense_inner.bias = subcell.dense.bias
-        subcell.dense = dense_inner
-        if subcell.has_act and subcell.activation is not None:
-            subcell.activation = self._convert_activation(subcell.activation)
-        elif subcell.after_fake:
-            subcell.has_act = True
-            subcell.activation = _AddFakeQuantAfterSubCell(F.identity,
-                                                           quant_dtype=self.act_dtype,
-                                                           quant_delay=self.act_qdelay,
-                                                           per_channel=self.act_channel,
-                                                           symmetric=self.act_symmetric,
-                                                           narrow_range=self.act_range,
-                                                           optimize_option=self.optimize_option)
-        return subcell
-
-    def _convert_activation(self, activation):
-        """
-        convert activation cell to quant cell
-        """
-        act_class = activation.__class__
-        act_list = [nn.ReLU, nn.ReLU6, nn.Sigmoid]
-        act_list_with_fake_before = [nn.LeakyReLU, nn.HSigmoid, nn.HSwish]
-
-        if act_class in act_list:
-            return quant.ActQuant(activation=activation,
-                                  quant_config=self.quant_config,
-                                  quant_dtype=self.act_dtype)
-        if act_class in act_list_with_fake_before:
-            return quant.ActQuant(activation=activation,
-                                  ema=True,
-                                  fake_before=True,
-                                  quant_config=self.quant_config,
-                                  quant_dtype=self.act_dtype)
-        raise ValueError("Unsupported activation in auto quant: ", act_class)
-
-    def _kl_init(self, subcell_weight_para, weight_dtype):
-        """
-        Calculate the value of max_init and min_init with compute_kl_threshold.
-        """
-        if self.weight_channel:
-            max_init = [compute_kl_threshold(weight_para_each, weight_dtype)
-                        for weight_para_each in subcell_weight_para]
-            min_init = [-x for x in max_init]
-        else:
-            max_init = [compute_kl_threshold(subcell_weight_para, weight_dtype)]
-            min_init = [-x for x in max_init]
-        return min_init, max_init
-
-    def _set_mixed_bits(self, network, strategy):
-        r"""
-        Set network's quantization strategy, this function is currently only valid for `LEARNED_SCALE`
-        optimize_option.
-
-        Args:
-            network (Cell): Input network.
-            strategy (list): The quantization strategy for layers that need to be quantified (eg. [[8], [8],
-                ..., [6], [4], [8]]), currently only the quant_dtype for weights of the dense layer and the
-                convolution layer is supported.
-
-        Returns:
-            Cell, a network with mixed bit strategy configured.
-
-        Raises:
-            ValueError: If `OptimizeOption.LEARNED_SCALE` is not in `self.optimize_option`.
-        """
-        if OptimizeOption.LEARNED_SCALE not in self.optimize_option:
-            raise ValueError("The `_set_mixed_bits` function is currently only valid for `LEARNED_SCALE` "
-                             "optimize_option.")
-
-        quantizable_idx = []
-        pass_cell = None
-        for i, cell_and_name in enumerate(network.cells_and_names()):
-            cell = cell_and_name[1]
-            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)) and cell is not pass_cell:
-                quantizable_idx.append(i)
-
-        if len(quantizable_idx) != len(strategy):
-            raise ValueError("The dimension of quantifiable layers is not consistent with that of strategy.")
-
-        quantizable_layer_bit_dict = {idx: bit for idx, bit in zip(quantizable_idx, strategy)}
-        type_map = {
-            QuantDtype.INT2.num_bits: QuantDtype.INT2,
-            QuantDtype.INT3.num_bits: QuantDtype.INT3,
-            QuantDtype.INT4.num_bits: QuantDtype.INT4,
-            QuantDtype.INT5.num_bits: QuantDtype.INT5,
-            QuantDtype.INT6.num_bits: QuantDtype.INT6,
-            QuantDtype.INT7.num_bits: QuantDtype.INT7,
-            QuantDtype.INT8.num_bits: QuantDtype.INT8
-        }
-        if self.eps == 0:
-            raise ValueError("`epsilon` is zero may lead to divide zero error")
-        for i, cell_and_name in enumerate(network.cells_and_names()):
-            cell = cell_and_name[1]
-            if i not in quantizable_idx:
-                continue
-            if isinstance(cell, (nn.Conv2dBnAct, nn.DenseBnAct)):
-                cell.weight_dtype = type_map.get(quantizable_layer_bit_dict[i][0])
-                if cell.weight_dtype is None:
-                    raise ValueError("Input strategy is invalid: ", quantizable_layer_bit_dict[i][0])
-                if isinstance(cell, nn.Conv2dBnAct):
-                    subcell_weight_para = cell.conv.weight.data.asnumpy()
-                    if hasattr(cell.conv, 'gamma'):
-                        scale_factor = (cell.conv.gamma.data.asnumpy() /
-                                        np.sqrt(cell.conv.moving_variance.data.asnumpy() + self.eps))
-                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
-                    cell.conv.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
-                                                      min_init=min_init,
-                                                      max_init=max_init)
-                elif isinstance(cell, nn.DenseBnAct):
-                    subcell_weight_para = cell.dense.weight.data.asnumpy()
-                    if hasattr(cell.dense, 'gamma'):
-                        scale_factor = (cell.dense.gamma.data.asnumpy() /
-                                        np.sqrt(cell.dense.moving_variance.data.asnumpy() + self.eps))
-                        subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-                    min_init, max_init = self._kl_init(subcell_weight_para, cell.weight_dtype)
-                    cell.dense.fake_quant_weight.reset(quant_dtype=cell.weight_dtype,
-                                                       min_init=min_init,
-                                                       max_init=max_init)
-        return network
diff --git a/mindspore/python/mindspore/compression/quant/quant_utils.py b/mindspore/python/mindspore/compression/quant/quant_utils.py
deleted file mode 100644
index cdd1980b182..00000000000
--- a/mindspore/python/mindspore/compression/quant/quant_utils.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Quantization utils.
-
-Note: This is an experimental interface that is subject to change and/or deletion.
-"""
-
-from __future__ import absolute_import
-import numpy as np
-from mindspore._checkparam import Validator
-from mindspore import log as logger
-from ... import nn
-
-__all__ = ["load_nonquant_param_into_quant_net", "query_quant_layers"]
-
-
-def cal_quantization_params(input_min,
-                            input_max,
-                            quant_min,
-                            quant_max,
-                            data_type,
-                            symmetric=False):
-    r"""
-    Calculate quantization params for scale and zero point.
-
-    Args:
-        input_min (numpy.ndarray): The dimension of channel or 1.
-        input_max (numpy.ndarray): The dimension of channel or 1.
-        quant_min (int): The minimum quantization integer.
-        quant_max (int): The maximum quantization integer.
-        data_type (numpy type) : Can be numpy int8, numpy uint8.
-        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
-
-    Returns:
-        scale (numpy.ndarray): quantization param.
-        zero point (numpy.ndarray): quantization param.
-    """
-    if quant_min == quant_max:
-        raise ValueError("quant_max is equal to quant_min which will lead to divide zero error.")
-
-    input_max = np.maximum(0.0, input_max)
-    input_min = np.minimum(0.0, input_min)
-
-    if input_min.shape != input_max.shape:
-        raise ValueError("input min shape should be equal to input max.")
-    if len(input_min.shape) > 1:
-        raise ValueError("input min and max shape should be one dim.")
-    if (input_min > input_max).all():
-        raise ValueError("input_min min should be less than input max.")
-    if (input_max == input_min).all():
-        return np.ones(input_min.shape), np.zeros(input_min.shape)
-
-    # calculate scale
-    if symmetric:
-        input_max = np.maximum(-input_min, input_max)
-        input_min = -input_max
-    scale = (input_max - input_min) / (quant_max - quant_min)
-
-    # calculate zero point
-    if data_type == np.int8 and symmetric:
-        zp = np.zeros(input_min.shape)
-    else:
-        if scale == 0.0:
-            raise ValueError("scale can not be 0.")
-        zp_double = quant_min - input_min / scale
-        zp = np.floor(zp_double + 0.5)
-
-    return scale, zp
-
-
-def get_quant_min_max(data_type, num_bits=8, narrow_range=False):
-    """Calculate quantization params for minimum/maximum quantization integer"""
-    if data_type == np.int8:
-        quant_min = 0 - 2 ** (num_bits - 1)
-        quant_max = 2 ** (num_bits - 1) - 1
-    elif data_type == np.uint8:
-        quant_min = 0
-        quant_max = 2 ** num_bits - 1
-    else:
-        raise ValueError("Unsupported datatype({})".format(data_type))
-    if narrow_range:
-        quant_min = quant_min + 1
-    return quant_min, quant_max
-
-
-def weight2int(data, scale, zero_point, quant_min, quant_max):
-    r"""
-    Calculate int8/uint8 weight from fp32. the formula is defined as:
-
-    .. math::
-        int8/uint8 = round(float/scale) + offset
-
-    Args:
-        data (numpy.ndarray): The dimension of channel or 1. Should be NCHW.
-        scale (numpy.ndarray): The dimension of channel or 1.
-        zero_point (numpy.ndarray): The dimension of channel or 1.
-        quant_min (int): The minimum quantization integer.
-        quant_max (int): The maximum quantization integer.
-
-    Returns:
-        weight (numpy.ndarray): The dimension of channel or 1.
-    """
-    if scale.shape != zero_point.shape:
-        raise ValueError("`scale` and `zero_point` should have the same shape.")
-    if scale.shape[0] < 0:
-        raise ValueError("`scale` and `zero_point` shape should be greater than zero.")
-    if 0 in scale:
-        raise ValueError("Zero exist in `scale` which will lead to divide zero error.")
-    if len(scale.shape) >= 1 and scale.shape[0] > 1:
-        # for perchannel
-        if scale.shape[0] == data.shape[0]:
-            # `Conv2d` or `Dense` op weight
-            shape_list = [-1] + [1] * len(data.shape[1:])
-            scale = scale.reshape(shape_list)
-            zero_point = zero_point.reshape(shape_list)
-        elif scale.shape[0] == data.shape[1]:
-            # `DepthwiseConv2d` op weight
-            shape_list = [1, -1] + [1] * len(data.shape[2:])
-            scale = scale.reshape(shape_list)
-            zero_point = zero_point.reshape(shape_list)
-        else:
-            raise ValueError("Unsupported weight shape({})".format(data.shape))
-
-    weight_int = np.round((data / scale) + zero_point)
-    weight_int[weight_int > quant_max] = quant_max
-    weight_int[weight_int < quant_min] = quant_min
-    return weight_int
-
-
-def scale_zp_max_min_from_fake_quant_cell(cell, data_type):
-    """Get calculate quantization params for scale, zero point, max and min from `FakeQuantWithMinMaxObserver`."""
-    minq = cell.minq.data.asnumpy()
-    maxq = cell.maxq.data.asnumpy()
-    # make sure maxq > 0 and minq <= 0
-    if cell.mode == 'LEARNED_SCALE':
-        maxq = np.abs(maxq)
-        minq = -np.abs(minq)
-    quant_min, quant_max = get_quant_min_max(data_type, num_bits=cell.num_bits, narrow_range=cell.narrow_range)
-    symmetric = cell.symmetric and not cell.neg_trunc
-    scale, zp = cal_quantization_params(
-        minq, maxq,
-        quant_min, quant_max, data_type,
-        symmetric=symmetric)
-    return scale, zp, maxq, minq
-
-
-def fold_batchnorm(weight, cell_quant):
-    r"""
-    Fold the batchnorm in `Conv2dBnFoldQuant` to weight.
-
-    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
-
-    Args:
-        weight (numpy.ndarray): Weight of `cell_quant`.
-        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnFoldQuant`.
-
-    Returns:
-        weight (numpy.ndarray): Folded weight.
-        bias (numpy.ndarray): Folded bias.
-    """
-    variance = cell_quant.moving_variance.data.asnumpy()
-    mean = cell_quant.moving_mean.data.asnumpy()
-    gamma = cell_quant.gamma.data.asnumpy()
-    beta = cell_quant.beta.data.asnumpy()
-    epsilon = cell_quant.eps
-    if epsilon == 0:
-        raise ValueError("`epsilon` is zero may lead to divide zero error")
-    sigma = np.sqrt(variance + epsilon)
-
-    if gamma.shape[0] == weight.shape[0]:
-        # `Conv2d` or `Dense` op weight
-        shape_list = [-1] + [1] * len(weight.shape[1:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    elif gamma.shape[0] == weight.shape[1]:
-        # `DepthwiseConv2d` op weight
-        shape_list = [1, -1] + [1] * len(weight.shape[2:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    else:
-        raise ValueError("Unsupported weight shape({})".format(weight.shape))
-
-    weight = weight * _gamma / _sigma
-    bias = beta - gamma * mean / sigma
-    return weight, bias
-
-
-def without_fold_batchnorm(weight, cell_quant):
-    r"""
-    Fold the batchnorm in `Conv2dBnWithoutFoldQuant` to weight.
-
-    Calculate from `FakeQuantWithMinMax`'s Parameter or Fake quant primitive.
-
-    Args:
-        weight (numpy.ndarray): Weight of `cell_quant`.
-        cell_quant (Cell): Object of `mindspore.nn.layer.Conv2dBnWithoutFoldQuant`.
-
-    Returns:
-        weight (numpy.ndarray): whihout folded weight.
-        bias (numpy.ndarray): without folded bias.
-    """
-    variance = cell_quant.batchnorm.moving_variance.data.asnumpy()
-    mean = cell_quant.batchnorm.moving_mean.data.asnumpy()
-    gamma = cell_quant.batchnorm.gamma.data.asnumpy()
-    beta = cell_quant.batchnorm.beta.data.asnumpy()
-    epsilon = cell_quant.batchnorm.eps
-    if epsilon == 0:
-        raise ValueError("`epsilon` is zero may lead to divide zero error")
-    sigma = np.sqrt(variance + epsilon)
-
-    if gamma.shape[0] == weight.shape[0]:
-        # `Conv2d` or `Dense` op weight
-        shape_list = [-1] + [1] * len(weight.shape[1:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    elif gamma.shape[0] == weight.shape[1]:
-        # `DepthwiseConv2d` op weight
-        shape_list = [1, -1] + [1] * len(weight.shape[2:])
-        _gamma = gamma.reshape(shape_list)
-        _sigma = sigma.reshape(shape_list)
-    else:
-        raise ValueError("Unsupported weight shape({})".format(weight.shape))
-
-    weight = weight * _gamma / _sigma
-    bias = beta - gamma * mean / sigma
-    return weight, bias
-
-
-def compute_kl_threshold(data, bitwidth):
-    r"""
-    Using KL-J Distance to calculate the clip threshold.
-
-    Args:
-        - **data** (NumpyArray) - Data observed to calculate the threshold for quantization,
-        - **bitwidth** (QuantDtype) - The datatype of quantization.
-    Outputs:
-        Tensor with Shape 1. Threshold to calculate the data.
-    """
-    data_max = np.abs(data).max()
-    if data_max < 1e-5:
-        return 1e-5
-    hist, bin_edges = np.histogram(np.abs(data), bins='sqrt', range=(0, data_max), density=True)
-    # For the sake of high efficiency, we limit the maximum number of bins to 1024 in `sqrt` mode, If it exceeds the
-    # largest size, turn to use the default bins config.
-    largest_bin_size = 1024
-    if hist.shape[0] > largest_bin_size:
-        hist, bin_edges = np.histogram(np.abs(data), range=(0, data_max), density=True)
-    sum_ = np.sum(hist)
-    if sum_ == 0:
-        hist = 0
-    else:
-        hist = hist / sum_
-    cumsum = np.cumsum(hist)
-    bit_pow_range = pow(2, int(bitwidth.num_bits) - 1)
-    threshold = []
-    scaling_factor = []
-    kl = []
-    if bit_pow_range + 1 > len(bin_edges) - 1:
-        th_layer_out = bin_edges[-1]
-        return float(th_layer_out)
-    for i in range(bit_pow_range + 1, len(bin_edges), 1):
-        threshold_tmp = (i + 0.5) * (bin_edges[1] - bin_edges[0])
-        threshold = np.concatenate((threshold, [threshold_tmp]))
-        scaling_factor_tmp = threshold_tmp / (bit_pow_range - 1)
-        scaling_factor = np.concatenate((scaling_factor, [scaling_factor_tmp]))
-        # forward interpolation
-        cumsum_tmp = np.copy(cumsum)
-        cumsum_tmp[(i - 1):] = 1
-        fwd_x = np.linspace(0.0, 1.0, bit_pow_range)
-        fwd_xp = np.linspace(0.0, 1.0, i)
-        fwd_fp = cumsum_tmp[:i]
-        forward_interp = np.interp(fwd_x, fwd_xp, fwd_fp)
-        # backward interpolation
-        bwd_x = np.linspace(0.0, 1.0, i)
-        bwd_xp = np.linspace(0.0, 1.0, bit_pow_range)
-        bwd_fp = forward_interp
-        backward_interp = np.interp(bwd_x, bwd_xp, bwd_fp)
-        cumsum_tmp[:i] = backward_interp
-        if 0 in cumsum_tmp:
-            raise ValueError("Zero exist in `cumsum_tmp` which will lead to divide zero error")
-        kl_tmp = np.sum((cumsum - cumsum_tmp) * np.log2(cumsum / cumsum_tmp))  # Kullback-Leibler-J
-        kl = np.concatenate((kl, [kl_tmp]))
-    th_layer_out = threshold[np.argmin(kl)]
-    threshold = float(th_layer_out)
-    if threshold < 1e-5:
-        threshold = 1e-5
-    return threshold
-
-
-def query_quant_layers(network):
-    r"""
-    Query the network's quantization strategy of each quantized layer and print it to the screen, note that all the
-    quantization layers are queried before graph compile optimization in the graph mode, thus, some redundant quantized
-    layers, which not exist in practical execution, may appear.
-
-    Args:
-        network (Cell): input network
-
-    Examples:
-        >>> from mindspore.compression.quant import QuantizationAwareTraining
-        >>> from mindspore.compression.quant.quant_utils import query_quant_layers
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> quantizer = QuantizationAwareTraining(bn_fold=False, per_channel=[True, False], symmetric=[True, False])
-        >>> net_qat = quantizer.quantize(net)
-        >>> query_quant_layers(net_qat)
-        conv1.conv.fake_quant_weight                                       INT8
-        conv1.activation.fake_quant_act                                    INT8
-        conv2.conv.fake_quant_weight                                       INT8
-        conv2.activation.fake_quant_act                                    INT8
-        fc1.dense.fake_quant_weight                                        INT8
-        fc1.activation.fake_quant_act                                      INT8
-        fc2.dense.fake_quant_weight                                        INT8
-        fc2.activation.fake_quant_act                                      INT8
-        fc3.dense.fake_quant_weight                                        INT8
-        fc3.activation.fake_quant_act                                      INT8
-    """
-    network = Validator.check_isinstance("network", network, nn.Cell)
-    tplt = "{0:60}\t{1:10}"
-    for cell_and_name in network.cells_and_names():
-        cell_name = cell_and_name[0]
-        cell = cell_and_name[1]
-        if isinstance(cell, nn.FakeQuantWithMinMaxObserver):
-            logger.info(tplt.format(cell_name, cell.quant_dtype))
-
-
-def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_params=None):
-    r"""
-    Load fp32 model parameters into quantization model.
-
-    Args:
-        quant_model(Cell): Quantization model.
-        params_dict(dict): Parameter dict that stores fp32 parameters.
-        quant_new_params(list): Parameters that exist in quantization network but not in non-quantization
-            network. Default: None.
-
-    Raises:
-        TypeError: If `quant_new_params` is not None and is not list.
-        ValueError: If there are parameters in the `quant_model` that are neither in `params_dict`
-            nor in `quant_new_params`.
-
-    Examples:
-        >>> import mindspore as ms
-        >>> from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
-        >>> class LeNet5(nn.Cell):
-        ...     def __init__(self, num_class=10, channel=1):
-        ...         super(LeNet5, self).__init__()
-        ...         self.type = "fusion"
-        ...         self.num_class = num_class
-        ...
-        ...         # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        ...         self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        ...         self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        ...         # change `nn.Dense` to `nn.DenseBnAct`
-        ...         self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        ...         self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        ...         self.fc3 = nn.DenseBnAct(84, self.num_class)
-        ...
-        ...         self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        ...         self.flatten = nn.Flatten()
-        ...
-        ...     def construct(self, x):
-        ...         x = self.conv1(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.conv2(x)
-        ...         x = self.max_pool2d(x)
-        ...         x = self.flatten(x)
-        ...         x = self.fc1(x)
-        ...         x = self.fc2(x)
-        ...         x = self.fc3(x)
-        ...         return x
-        ...
-        >>> net = LeNet5()
-        >>> ckpt_file_name = "./checkpoint/LeNet5_noquant-1_32.ckpt"
-        >>> param_dict = ms.load_checkpoint(ckpt_file_name)
-        >>> load_nonquant_param_into_quant_net(net, param_dict)
-    """
-    if quant_new_params is not None and not isinstance(quant_new_params, list):
-        raise TypeError("quant_new_params must be list or None.")
-    iterable_dict = {
-        'minq': iter(list(filter(lambda item: item[0].endswith('minq'), params_dict.items()))),
-        'maxq': iter(list(filter(lambda item: item[0].endswith('maxq'), params_dict.items()))),
-        'quant_max': iter(list(filter(lambda item: item[0].endswith('quant_max'), params_dict.items())))
-    }
-    for param in params_dict.items():
-        key_name = param[0].split(".")[-1]
-        if key_name not in iterable_dict:
-            iterable_dict[key_name] = iter(list(filter(lambda item, value=key_name: item[0].endswith(value),
-                                                       params_dict.items())))
-
-    for name, param in quant_model.parameters_and_names():
-        key_name = name.split(".")[-1]
-        if key_name not in iterable_dict.keys():
-            if key_name not in quant_new_params:
-                raise ValueError(f"Can't find match parameter in ckpt, param name = {name}")
-            continue
-        value_param = next(iterable_dict[key_name], None)
-        if value_param:
-            param.set_data(value_param[1].data)
-            logger.info(f'init model param {name} with checkpoint param {value_param[0]}')
-
-    # Perform KL_init when learned scale quantization is executed.
-    for cell_and_name in quant_model.cells_and_names():
-        cell = cell_and_name[1]
-        if isinstance(cell, (nn.Conv2dBnFoldQuantOneConv, nn.Conv2dBnFoldQuant, nn.Conv2dBnWithoutFoldQuant,
-                             nn.Conv2dQuant, nn.DenseQuant)) and cell.fake_quant_weight.mode == "LEARNED_SCALE":
-            subcell_weight_para = cell.weight.data.asnumpy()
-            if hasattr(cell, 'gamma'):
-                scale_factor = (cell.gamma.data.asnumpy() /
-                                np.sqrt(cell.moving_variance.data.asnumpy() + 1e-5))
-                subcell_weight_para = subcell_weight_para * scale_factor.reshape(-1, 1, 1, 1)
-
-            if cell.fake_quant_weight.per_channel:
-                max_init = [compute_kl_threshold(weight_para_each, cell.fake_quant_weight.quant_dtype)
-                            for weight_para_each in subcell_weight_para]
-                min_init = [-x for x in max_init]
-            else:
-                max_init = [compute_kl_threshold(subcell_weight_para, cell.fake_quant_weight.quant_dtype)]
-                min_init = [-x for x in max_init]
-
-            cell.fake_quant_weight.reset(quant_dtype=cell.fake_quant_weight.quant_dtype,
-                                         min_init=min_init, max_init=max_init)
diff --git a/mindspore/python/mindspore/compression/quant/quantizer.py b/mindspore/python/mindspore/compression/quant/quantizer.py
deleted file mode 100644
index 571123a8d50..00000000000
--- a/mindspore/python/mindspore/compression/quant/quantizer.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Note:
-    Base Class of Quantizer. This is interface that is subject to change or deletion.
-"""
-
-from __future__ import absolute_import
-from abc import ABC, abstractmethod
-from enum import Enum
-
-from mindspore._checkparam import Validator
-
-__all__ = ["OptimizeOption"]
-
-
-class OptimizeOption(Enum):
-    r"""
-    An enum for the model quantization optimize option, currently only support `QAT` and `LEARNED_SCALE`.
-    """
-    # using quantization aware training
-    QAT = "QAT"
-
-    # using the learned scale quantization
-    LEARNED_SCALE = "LEARNED_SCALE"
-
-    def __str__(self):
-        return str(self.value)
-
-
-class Quantizer(ABC):
-    """
-    Base class of Quantizer. You can implement different kind of quantizer to get different quantization result.
-
-    Notes:
-        This class is an abstract class.
-
-    Args:
-        optimize_option (OptimizeOption, list or tuple): Specifies the quant algorithm and options. Default:
-            OptimizeOption.QAT.
-    """
-    def __init__(self,
-                 optimize_option=OptimizeOption.QAT):
-        if not isinstance(optimize_option, list) and not isinstance(optimize_option, tuple):
-            optimize_option = [optimize_option]
-        for option in optimize_option:
-            option = Validator.check_isinstance("optimize_option", option, OptimizeOption)
-        self.optimize_option = optimize_option
-
-    @abstractmethod
-    def quantize(self, network):
-        """
-        Quant API to convert input network to a quantization aware training network
-        Args:
-            network (Cell): network to be quantized.
-        """
diff --git a/mindspore/python/mindspore/nn/layer/__init__.py b/mindspore/python/mindspore/nn/layer/__init__.py
index 255339f6016..4ce3352d2b3 100644
--- a/mindspore/python/mindspore/nn/layer/__init__.py
+++ b/mindspore/python/mindspore/nn/layer/__init__.py
@@ -20,7 +20,7 @@ The high-level components(Cells) used to construct the neural network.
 from __future__ import absolute_import
 
 from mindspore.nn.layer import activation, normalization, container, conv, basic, embedding, pooling, \
-    image, quant, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
+    image, math, combined, timedistributed, thor_layer, rnns, rnn_cells, padding, dense
 from mindspore.nn.layer.activation import *
 from mindspore.nn.layer.normalization import *
 from mindspore.nn.layer.container import *
@@ -32,7 +32,6 @@ from mindspore.nn.layer.basic import *
 from mindspore.nn.layer.embedding import *
 from mindspore.nn.layer.pooling import *
 from mindspore.nn.layer.image import *
-from mindspore.nn.layer.quant import *
 from mindspore.nn.layer.math import *
 from mindspore.nn.layer.combined import *
 from mindspore.nn.layer.timedistributed import *
@@ -53,7 +52,6 @@ __all__.extend(basic.__all__)
 __all__.extend(embedding.__all__)
 __all__.extend(pooling.__all__)
 __all__.extend(image.__all__)
-__all__.extend(quant.__all__)
 __all__.extend(math.__all__)
 __all__.extend(combined.__all__)
 __all__.extend(timedistributed.__all__)
diff --git a/mindspore/python/mindspore/nn/layer/quant.py b/mindspore/python/mindspore/nn/layer/quant.py
deleted file mode 100644
index 8df53dccf62..00000000000
--- a/mindspore/python/mindspore/nn/layer/quant.py
+++ /dev/null
@@ -1,1868 +0,0 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Quantization aware training."""
-from __future__ import absolute_import
-
-from functools import partial
-from collections import namedtuple
-import numpy as np
-
-import mindspore.common.dtype as mstype
-from mindspore.ops.primitive import Primitive
-from mindspore.ops import operations as P
-from mindspore.common.parameter import Parameter
-from mindspore.common.initializer import initializer
-from mindspore.common.tensor import Tensor
-from mindspore._checkparam import Validator, twice
-from mindspore.compression.common import QuantDtype
-import mindspore.context as context
-from mindspore.nn.layer.normalization import BatchNorm2d
-from mindspore.nn.layer.activation import get_activation
-from mindspore.nn.cell import Cell
-from mindspore import nn
-from mindspore.ops.operations import _quant_ops as Q
-from mindspore.nn.layer.combined import Conv2dBnAct
-from mindspore.nn.layer.conv import Conv2d
-from mindspore.nn.layer.basic import Dense
-
-__all__ = [
-    'FakeQuantWithMinMaxObserver',
-    'Conv2dBnFoldQuantOneConv',
-    'Conv2dBnFoldQuant',
-    'Conv2dBnWithoutFoldQuant',
-    'Conv2dQuant',
-    'DenseQuant',
-    'ActQuant',
-    'TensorAddQuant',
-    'MulQuant',
-]
-
-
-class BatchNormFoldCell(Cell):
-    """
-    Batch Normalization folded.
-
-    Args:
-        momentum (float): Momentum value must be [0, 1]. Default: 0.9.
-        epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
-            float32 else 1e-3. Default: 1e-5.
-        freeze_bn (int): Delay in steps at which computation switches from regular batch
-            norm to frozen mean and std. Default: 0.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C, H, W)`.
-        - **mean** (Tensor) - Tensor of shape :math:`(C,)`.
-        - **variance** (Tensor) - Tensor of shape :math:`(C,)`.
-        - **global_step** (Tensor) - Tensor to record current global step.
-
-    Outputs:
-        Tuple of 4 Tensor, the normalized input and the updated parameters.
-
-        - **batch_mean** (Tensor) - Tensor of shape :math:`(C,)`.
-        - **batch_std** (Tensor) - Tensor of shape :math:`(C,)`.
-        - **running_mean** (Tensor) - Tensor of shape :math:`(C,)`.
-        - **running_std** (Tensor) - Tensor of shape :math:`(C,)`.
-    """
-
-    def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0):
-        """Initialize batch norm fold layer"""
-        super(BatchNormFoldCell, self).__init__()
-        self.epsilon = epsilon
-        self.is_gpu = context.get_context('device_target') == "GPU"
-        if self.is_gpu:
-            self.bn_train = Q.BatchNormFold(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
-            self.bn_infer = Q.BatchNormFold(momentum, epsilon, is_training=False, freeze_bn=freeze_bn)
-        else:
-            self.bn_reduce = P.BNTrainingReduce()
-            self.bn_update = Q.BatchNormFoldD(momentum, epsilon, is_training=True, freeze_bn=freeze_bn)
-
-    def construct(self, x, mean, variance, global_step):
-        if self.is_gpu:
-            if self.training:
-                batch_mean, batch_std, running_mean, running_std = self.bn_train(x, mean, variance, global_step)
-            else:
-                batch_mean, batch_std, running_mean, running_std = self.bn_infer(x, mean, variance, global_step)
-        else:
-            if self.training:
-                x_sum, x_square_sum = self.bn_reduce(x)
-                _, batch_mean, batch_std, running_mean, running_std, mean_updated, variance_updated = \
-                    self.bn_update(x, x_sum, x_square_sum, mean, variance)
-                P.Assign()(mean, mean_updated)
-                P.Assign()(variance, variance_updated)
-            else:
-                batch_mean = P.ZerosLike()(variance)
-                batch_std = P.OnesLike()(variance)
-                running_mean = P.Add()(mean, 0.)
-                running_std = P.Sqrt()(P.Add()(variance, self.epsilon))
-        return batch_mean, batch_std, running_mean, running_std
-
-
-def _partial_init(cls_or_self, **kwargs):
-    """
-    Wrapper that allows creation of class factories.
-
-    This can be useful when there is a need to create classes with the same
-    constructor arguments, but different instances.
-
-    Examples:
-        >>> class Foo:
-        ...     def __init__(self, a, b, answer):
-        ...         pass
-        >>> Foo.partial_init = classmethod(_partial_init)
-        >>> foo_builder = Foo.partial_init(a=3, b=4).partial_init(answer=42)
-        >>> foo_instance1 = foo_builder()
-        >>> foo_instance2 = foo_builder()
-        >>> result = (id(foo_instance1) == id(foo_instance2))
-        >>> print(result)
-        False
-    """
-
-    class _PartialWrapper:
-        r"""
-        class of wrapper that allows creation of class factories.
-        """
-
-        partial_init = _partial_init
-
-        def __init__(self, p):
-            self.p = p
-
-        def __call__(self, *args, **keywords):
-            return self.p(*args, **keywords)
-
-        def __repr__(self):
-            return self.p.__repr__()
-
-    r = _PartialWrapper(partial(cls_or_self, **kwargs))
-    return r
-
-
-class _Observer(Cell):
-    """
-    Base class of Observer. Observer is used to calculate the statistics of specific layer.
-
-    Notes:
-        This class is an abstract class.
-
-    Args:
-        quant_dtype (QuantDtype): The type of FakeQuant data.
-    """
-
-    partial_init = classmethod(_partial_init)
-
-    def __init__(self, quant_dtype):
-        """Initialize _Observer."""
-        super(_Observer, self).__init__()
-        self.quant_dtype = quant_dtype
-
-    def extend_repr(self):
-        s = f"quant_dtype={self.quant_dtype}"
-        return s
-
-    def construct(self):
-        pass
-
-
-class UniformQuantObserver(_Observer):
-    """
-    The base class of Uniform Quantization Observer.
-
-    Args:
-        quant_dtype (QuantDtype): The type of FakeQuant data. Default: QuantDtype.INT8.
-        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
-        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
-        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
-        num_channels (int): declarate the min and max channel size, Default: 1.
-
-    Returns:
-        Tensor.
-    """
-
-    min_max_map = {
-        QuantDtype.INT2: (-2, 1),
-        QuantDtype.INT3: (-4, 3),
-        QuantDtype.INT4: (-8, 7),
-        QuantDtype.INT5: (-16, 15),
-        QuantDtype.INT6: (-32, 31),
-        QuantDtype.INT7: (-64, 63),
-        QuantDtype.INT8: (-128, 127),
-
-        QuantDtype.UINT2: (0, 3),
-        QuantDtype.UINT3: (0, 7),
-        QuantDtype.UINT4: (0, 15),
-        QuantDtype.UINT5: (0, 31),
-        QuantDtype.UINT6: (0, 63),
-        QuantDtype.UINT7: (0, 127),
-        QuantDtype.UINT8: (0, 255)
-    }
-
-    def __init__(self, quant_dtype=QuantDtype.INT8, per_channel=False, symmetric=False, narrow_range=False,
-                 num_channels=1):
-        """Initialize UniformQuantObserver."""
-        super(UniformQuantObserver, self).__init__(quant_dtype)
-        self.per_channel = per_channel
-        self.symmetric = symmetric
-        self.narrow_range = narrow_range
-        self.num_channels = num_channels
-
-
-class FakeQuantWithMinMaxObserver(UniformQuantObserver):
-    r"""
-    Quantization aware operation which provides the fake quantization observer function on data with min and max.
-
-    The detail of the quantization mode `DEFAULT` is described as below:
-
-    The running min/max :math:`x_{min}` and :math:`x_{max}` are computed as:
-
-    .. math::
-
-        \begin{array}{ll} \\
-            x_{min} =
-            \begin{cases}
-                \min(\min(X), 0)
-                  & \text{ if } ema = \text{False} \\
-                \min((1 - c) \min(X) + \text{c } x_{min}, 0)
-                  & \text{ if } \text{otherwise}
-            \end{cases}\\
-            x_{max} =
-            \begin{cases}
-                \max(\max(X), 0)
-                  & \text{ if } ema = \text{False} \\
-                \max((1 - c) \max(X) + \text{c } x_{max}, 0)
-                  & \text{ if } \text{otherwise}
-            \end{cases}
-        \end{array}
-
-    where X is the input tensor, and :math:`c` is the `ema_decay`.
-
-    The scale and zero point zp is computed as:
-
-    .. math::
-
-        \begin{array}{ll} \\
-            scale =
-            \begin{cases}
-                \frac{x_{max} - x_{min}}{Q_{max} - Q_{min}}
-                  & \text{ if } symmetric = \text{False} \\
-                \frac{2\max(x_{max}, \left | x_{min} \right |) }{Q_{max} - Q_{min}}
-                  & \text{ if } \text{otherwise}
-            \end{cases}\\
-            zp\_min = Q_{min} - \frac{x_{min}}{scale} \\
-            zp = \left \lfloor \min(Q_{max}, \max(Q_{min}, zp\_min)) + 0.5 \right \rfloor
-        \end{array}
-
-    where :math:`Q_{max}` and :math:`Q_{min}` is decided by quant_dtype, for example, if quant_dtype=INT8,
-    then :math:`Q_{max} = 127` and :math:`Q_{min} = -128`.
-
-    The fake quant output is computed as:
-
-    .. math::
-
-        \begin{array}{ll} \\
-            u_{min} = (Q_{min} - zp) * scale \\
-            u_{max} = (Q_{max} - zp) * scale \\
-            u_X = \left \lfloor \frac{\min(u_{max}, \max(u_{min}, X)) - u_{min}}{scale}
-            + 0.5 \right \rfloor \\
-            output = u_X * scale + u_{min}
-        \end{array}
-
-    The detail of the quantization mode `LEARNED_SCALE` is described as below:
-
-    The fake quant output is computed as:
-
-    .. math::
-
-        \bar{X}=\left\{\begin{matrix}
-        clip\left ( \frac{X}{maxq},0,1\right ) \qquad \quad if\quad neg\_trunc\\
-        clip\left ( \frac{X}{maxq},-1,1\right )\qquad \ if\quad otherwise
-        \end{matrix}\right. \\
-
-        output=\frac{floor\left ( \bar{X}\ast  Q_{max}+0.5  \right ) \ast scale }{Q_{max}}
-
-    where X is the input tensor.
-    where :math:`Q_{max}` (quant_max) is decided by quant_dtype and neg_trunc, for example, if quant_dtype=INT8
-    and neg_trunc works, :math:`Q_{max} = 256` , otherwise :math:`Q_{max} = 127`.
-
-    The maxq is updated by training, and its gradient is calculated as follows:
-
-    .. math::
-
-        \frac{\partial \ output}{\partial \ maxq} = \left\{\begin{matrix}
-        -\frac{X}{maxq}+\left \lfloor \frac{X}{maxq} \right \rceil \qquad if\quad bound_{lower}< \frac{X}{maxq}< 1\\
-        -1 \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\le bound_{lower}\\
-         1  \qquad \quad \qquad \quad if\quad \frac{X}{maxq}\ge  1 \qquad \quad
-        \end{matrix}\right. \\
-
-        bound_{lower}=
-        \left\{\begin{matrix}
-         0\qquad \quad if\quad neg\_trunc\\
-        -1\qquad if\quad otherwise
-        \end{matrix}\right.
-
-    Then minq is computed as:
-
-    .. math::
-
-        minq=\left\{\begin{matrix}
-        0  \qquad \qquad \quad if\quad neg\_trunc\\
-        -maxq\qquad if\quad otherwise
-        \end{matrix}\right.
-
-    When exporting, the scale and zero point zp is computed as:
-
-    .. math::
-
-        scale=\frac{maxq}{quant\_max} ,\quad zp=0 \\
-
-    zp is equal to 0 consistently, due to the LEARNED_SCALE`s symmetric nature.
-
-    Args:
-        min_init (int, float, list): The initialized min value. Default: -6.
-        max_init (int, float, list): The initialized max value. Default: 6.
-        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        per_channel (bool):  Quantization granularity based on layer or on channel. Default: False.
-        channel_axis (int): Quantization by channel axis. Default: 1.
-        num_channels (int): declarate the min and max channel size, Default: 1.
-        quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8.
-        symmetric (bool): Whether the quantization algorithm is symmetric or not. Default: False.
-        narrow_range (bool): Whether the quantization algorithm uses narrow range or not. Default: False.
-        quant_delay (int): Quantization delay parameters according to the global step. Default: 0.
-        neg_trunc (bool): Whether the quantization algorithm uses negative truncation or not. Default: False.
-        mode (str): Optional quantization mode, currently only `DEFAULT`(QAT) and `LEARNED_SCALE` are supported.
-            Default: ("DEFAULT")
-    Inputs:
-        - **x** (Tensor) - The input of FakeQuantWithMinMaxObserver. The input dimension is preferably 2D or 4D.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x`.
-
-    Raises:
-        TypeError: If `min_init` or `max_init` is not int, float or list.
-        TypeError: If `quant_delay` is not an int.
-        ValueError: If `quant_delay` is less than 0.
-        ValueError: If `min_init` is not less than `max_init`.
-        ValueError: If `mode` is neither `DEFAULT` nor `LEARNED_SCALE`.
-        ValueError: If `mode` is `LEARNED_SCALE` and `symmetric` is not `True`.
-        ValueError: If `mode` is `LEARNED_SCALE`, and `narrow_range` is not `True` unless when `neg_trunc` is `True`.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore import Tensor
-        >>> fake_quant = nn.FakeQuantWithMinMaxObserver()
-        >>> x = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
-        >>> result = fake_quant(x)
-        >>> print(result)
-        [[ 0.9882355  1.9764705  0.9882355]
-         [-1.9764705  0.        -0.9882355]]
-    """
-
-    def __init__(self,
-                 min_init=-6,
-                 max_init=6,
-                 ema=False,
-                 ema_decay=0.999,
-                 per_channel=False,
-                 channel_axis=1,
-                 num_channels=1,
-                 quant_dtype=QuantDtype.INT8,
-                 symmetric=False,
-                 narrow_range=False,
-                 quant_delay=0,
-                 neg_trunc=False,
-                 mode="DEFAULT"):
-        """Initialize FakeQuantWithMinMaxObserver"""
-        super(FakeQuantWithMinMaxObserver, self).__init__(quant_dtype=quant_dtype, per_channel=per_channel,
-                                                          symmetric=symmetric, narrow_range=narrow_range,
-                                                          num_channels=num_channels)
-        Validator.check_value_type("min_init", min_init, [int, float, list], type(self).__name__)
-        Validator.check_value_type("max_init", max_init, [int, float, list], type(self).__name__)
-        Validator.check_non_negative_int(quant_delay, 'quant_delay', self.cls_name)
-        self.min_init = min_init
-        self.max_init = max_init
-        self.quant_dtype = quant_dtype
-        self.num_bits = quant_dtype.num_bits
-        self.ema = ema
-        self.ema_decay = ema_decay
-        self.per_channel = per_channel
-        self.num_channels = num_channels
-        self.channel_axis = channel_axis
-        self.quant_delay = quant_delay
-        self.symmetric = symmetric
-        self.narrow_range = narrow_range
-        self.neg_trunc = neg_trunc
-        self.mode = mode
-        self.is_ascend = context.get_context('device_target') == "Ascend"
-        self.Neg = P.Neg()
-
-        min_array = self._get_init_array(self.min_init)
-        max_array = self._get_init_array(self.max_init)
-        if not np.greater(max_array, min_array).all():
-            raise ValueError(f"For '{self.cls_name}', the 'max_init' must be greater than 'min_init', "
-                             f"but got 'max_init': {max_init}, 'min_init': {min_init}.")
-        if self.mode == "DEFAULT":
-            self._default_init(min_array, max_array)
-        elif self.mode == "LEARNED_SCALE":
-            self._learned_scale_init(min_array, max_array)
-        else:
-            raise ValueError(f"For '{self.cls_name}', only `DEFAULT` and `LEARNED_SCALE` mode are valid, but got "
-                             f"'mode': {self.mode}.")
-
-    def reset(self, quant_dtype=QuantDtype.INT8, min_init=-6, max_init=6):
-        r"""
-        Reset the quant max parameter (eg. 256) and the initial value of the minq parameter and maxq parameter,
-        this function is currently only valid for `LEARNED_SCALE` mode.
-
-        Args:
-            quant_dtype (QuantDtype): The datatype of quantization, supporting 4 and 8bits. Default: QuantDtype.INT8.
-            min_init (int, float, list): The initialized min value. Default: -6.
-            max_init (int, float, list): The initialized max value. Default: 6.
-        """
-        if self.mode == "LEARNED_SCALE":
-            self.quant_dtype = quant_dtype
-            self.num_bits = quant_dtype.num_bits
-            self._calculate_quant_max()
-            if self.neg_trunc:
-                min_init = 0
-
-            self.min_init = min_init
-            self.max_init = max_init
-            min_array = self._get_init_array(self.min_init)
-            max_array = self._get_init_array(self.max_init)
-            if not np.greater(max_array, min_array).all():
-                raise ValueError(f"For '{self.cls_name}', the 'max_init' must be greater than 'min_init', "
-                                 f"but got 'max_init': {max_init}, 'min_init': {min_init}.")
-
-            self.minq.set_data(Tensor(min_array))
-            self.maxq.set_data(Tensor(max_array))
-            self.quant_max.set_data(Tensor(np.array([self._quant_max]).astype(np.float32)))
-        else:
-            raise ValueError(f"For '{self.cls_name}', only `LEARNED_SCALE` mode is valid, but got 'mode': {self.mode}.")
-
-    def _default_init(self, min_array, max_array):
-        """
-        Initialization of `DEFAULT`(QAT) mode.
-        """
-        # init tensor min and max for fake quantized operation
-        self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False)
-        self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
-
-        # init fake quant relative op
-        if self.per_channel:
-            quant_fun = partial(Q.FakeQuantPerChannel, channel_axis=self.channel_axis)
-            ema_fun = partial(Q.MinMaxUpdatePerChannel, channel_axis=self.channel_axis)
-        else:
-            quant_fun = Q.FakeQuantPerLayer
-            ema_fun = Q.MinMaxUpdatePerLayer
-
-        self.ema_update = ema_fun(ema=self.ema, ema_decay=self.ema_decay)
-        if self.is_ascend:
-            self.fake_quant_train = quant_fun(num_bits=self.quant_dtype.num_bits,
-                                              symmetric=self.symmetric,
-                                              narrow_range=self.narrow_range,
-                                              quant_delay=self.quant_delay)
-            self.fake_quant_infer = self.fake_quant_train
-        else:
-            quant_fun = partial(quant_fun,
-                                ema=self.ema,
-                                ema_decay=self.ema_decay,
-                                num_bits=self.quant_dtype.num_bits,
-                                symmetric=self.symmetric,
-                                narrow_range=self.narrow_range,
-                                quant_delay=self.quant_delay)
-            self.fake_quant_train = quant_fun(training=True)
-            self.fake_quant_infer = quant_fun(training=False)
-
-    def _learned_scale_init(self, min_array, max_array):
-        """
-        Initialization of `LEARNED_SCALE` mode.
-        """
-        if not self.symmetric:
-            raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support 'symmetric' quant, "
-                             f"but got 'symmetric': {self.symmetric}. Please set 'symmetric' to True.")
-        if self.neg_trunc:
-            min_array = self._get_init_array(0)
-            if self.narrow_range:
-                raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support the combination of "
-                                 f"'neg_trunc=True and narrow_range=False' config scenario, but got 'narrow_range': "
-                                 f"{self.narrow_range}.")
-        elif not self.narrow_range:
-            raise ValueError(f"For '{self.cls_name}', the 'LEARNED_SCALE' mode only support 'narrow_range=True' "
-                             f"config, except for 'neg_trunc=True' scenario. But got 'narrow_range': "
-                             f"{self.narrow_range}.")
-
-        self._calculate_quant_max()
-
-        self.minq = Parameter(Tensor(min_array), name='minq')
-        self.maxq = Parameter(Tensor(max_array), name='maxq')
-        self.quant_max = Parameter(Tensor(np.array([self._quant_max]).astype(np.float32)),
-                                   name="quant_max", requires_grad=False)
-
-        # init fake quant relative op
-        if self.per_channel:
-            quant_fun = partial(Q.FakeLearnedScaleQuantPerChannel, channel_axis=self.channel_axis)
-        else:
-            quant_fun = Q.FakeLearnedScaleQuantPerLayer
-
-        quant_fun = partial(quant_fun,
-                            quant_delay=self.quant_delay,
-                            neg_trunc=self.neg_trunc)
-        self.fake_quant_train = quant_fun(training=True)
-        self.fake_quant_infer = quant_fun(training=False)
-
-    def _get_init_array(self, init_date):
-        """
-        Convert the initial value to array.
-        """
-        if isinstance(init_date, list) and self.per_channel and len(init_date) != self.num_channels:
-            raise ValueError(f"For '{self.cls_name}', the length of 'min_init/max_init' list must be equal to "
-                             f"'num_channels' for perchannel quant scenario, but got 'min_init/max_init': {init_date} "
-                             f"and num_channels: {self.num_channels}.")
-        if isinstance(init_date, list) and not self.per_channel and len(init_date) != 1:
-            raise ValueError(f"For '{self.cls_name}', the length of the 'min_init/max_init' list must be 1 for "
-                             f"perlayer quant scenario, but got {len(init_date)}.")
-
-        if isinstance(init_date, list):
-            min_max_array = np.array(init_date).astype(np.float32)
-        elif self.per_channel and not isinstance(init_date, list):
-            min_max_array = np.array([init_date] * self.num_channels).astype(np.float32)
-        else:
-            min_max_array = np.array([init_date]).astype(np.float32)
-        return min_max_array
-
-    def _calculate_quant_max(self):
-        """
-        The quantization range is calculated according to num_bits.
-        """
-        if not self.neg_trunc:
-            self._quant_max = (1 << (self.num_bits - 1)) - 1
-        else:
-            self._quant_max = (1 << self.num_bits) - 1
-
-    def extend_repr(self):
-        """Display instance object as string."""
-        s = 'quant_dtype={}, symmetric={}, narrow_range={}, ema={}({}), per_channel={}({}, {}), ' \
-            'quant_delay={}, min_init={}, max_init={}'.format(self.quant_dtype, self.symmetric, self.narrow_range,
-                                                              self.ema, self.ema_decay, self.per_channel,
-                                                              self.channel_axis, self.num_channels, self.quant_delay,
-                                                              self.min_init, self.max_init)
-        return s
-
-    def construct(self, x):
-        if self.mode == "LEARNED_SCALE":
-            if self.training:
-                out = self.fake_quant_train(x, self.maxq, self.quant_max)
-                if not self.neg_trunc:
-                    self.minq = self.Neg(self.maxq)
-            else:
-                out = self.fake_quant_infer(x, self.maxq, self.quant_max)
-        else:
-            if self.training:
-                min_up, max_up = self.ema_update(x, self.minq, self.maxq)
-                self.minq = min_up
-                self.maxq = max_up
-                out = self.fake_quant_train(x, self.minq, self.maxq)
-            else:
-                out = self.fake_quant_infer(x, self.minq, self.maxq)
-        return out
-
-
-QuantConfig = namedtuple("QuantConfig", ['weight', 'activation'])
-
-quant_config_default = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-                                   activation=FakeQuantWithMinMaxObserver.partial_init())
-
-
-class Conv2dBnFoldQuantOneConv(Cell):
-    r"""
-    2D convolution which use the convolution layer statistics once to calculate Batch Normalization
-    operation folded construct.
-
-    This part is a more detailed overview of Conv2d operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`FakeQuantWithMinMaxObserver`.
-
-    .. math::
-        w_{q}=quant(\frac{w}{\sqrt{var_{G}+\epsilon}}*\gamma )
-
-        b=\frac{-\mu _{G} }{\sqrt{var_{G}+\epsilon }}*\gamma +\beta
-
-        y=w_{q}\times x+b
-
-    where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of
-    subclass of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-    `mu _{G}` and `var_{G}` represent the global mean and variance respectively.
-
-    Args:
-        in_channels (int): The number of input channel :math:`C_{in}`.
-        out_channels (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
-        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
-        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
-        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
-            divisible by the number of groups. Default: 1.
-        eps (float): Parameters for Batch Normalization. Default: 1e-5.
-        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
-        has_bias (bool): Specifies whether the layer uses a bias vector, which is temporarily invalid. Default: False.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            convolution kernel. Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            bias vector. Default: 'zeros'.
-        beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            beta vector. Default: 'zeros'.
-        gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            gamma vector. Default: 'ones'.
-        mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            mean vector. Default: 'zeros'.
-        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            variance vector. Default: 'ones'.
-        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-
-    Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-
-    Raises:
-        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
-        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
-        TypeError: If `has_bias` or `fake` is not a bool.
-        TypeError: If `data_format` is not a string.
-        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
-        ValueError: If `padding` is less than 0.
-        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> conv2d_bnfold = nn.Conv2dBnFoldQuantOneConv(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
-        ...                                             weight_init="ones", quant_config=qconfig)
-        >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
-        >>> result = conv2d_bnfold(x)
-        >>> print(result)
-        [[[[5.9296875 13.8359375]
-           [11.859375 17.78125]]]]
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 pad_mode='same',
-                 padding=0,
-                 dilation=1,
-                 group=1,
-                 eps=1e-5,
-                 momentum=0.997,
-                 has_bias=False,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 beta_init='zeros',
-                 gamma_init='ones',
-                 mean_init='zeros',
-                 var_init='ones',
-                 fake=True,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize Conv2dBnFoldQuant layer"""
-        super(Conv2dBnFoldQuantOneConv, self).__init__()
-        self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name)
-        self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name)
-        self.kernel_size = twice(kernel_size)
-        self.stride = twice(stride)
-        self.dilation = twice(dilation)
-        for kernel_size_elem in self.kernel_size:
-            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
-        for stride_elem in self.stride:
-            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
-        for dilation_elem in self.dilation:
-            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
-        if pad_mode not in ('valid', 'same', 'pad'):
-            raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values "
-                             f"in ('valid', 'same', 'pad'), but got {pad_mode}.")
-        self.pad_mode = pad_mode
-        if isinstance(padding, int):
-            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
-            self.padding = padding
-        elif isinstance(padding, tuple):
-            for pad in padding:
-                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
-            self.padding = padding
-        else:
-            raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), but got "
-                            f"{type(padding).__name__}!")
-        self.group = Validator.check_positive_int(group, "group", self.cls_name)
-        self.eps = eps
-        self.momentum = 1 - momentum
-        self.has_bias = has_bias
-        self.fake = Validator.check_bool(fake, "fake", self.cls_name)
-        self.quant_config = quant_config
-        data_format = 'NCHW'
-        self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
-        self._target = context.get_context("device_target")
-        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
-        self.is_ge_backend = False
-        if context.get_context("enable_ge"):
-            self.is_ge_backend = True
-        self.enable_default_train = self.is_graph_mode and \
-                                    (self.is_ge_backend or self._target == "Ascend")
-
-        # initialize convolution op and Parameter
-        self.conv = P.Conv2D(out_channel=out_channels,
-                             kernel_size=self.kernel_size,
-                             pad_mode=pad_mode,
-                             pad=padding,
-                             stride=self.stride,
-                             dilation=self.dilation,
-                             group=group)
-        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
-        channel_axis = 0
-        self.channel_axis = channel_axis
-        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
-        self.bias_add = P.BiasAdd()
-        self.bias = None
-        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
-            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
-
-        # initialize BatchNorm Parameter
-        self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
-        self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
-        self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
-        self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
-                                         requires_grad=False)
-
-        # initialize fake ops
-        self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis,
-                                                     num_channels=out_channels)
-        self.freeze_bn = False
-        self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps,
-                                    momentum=self.momentum, data_format=self.format)
-
-        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)
-        self.sub_mean = P.Sub()
-        self.sub_var = P.Sub()
-        self.mul_mean = P.Mul()
-        self.mul_var = P.Mul()
-        self.assign_sub_mean = P.AssignSub()
-        self.assign_sub_var = P.AssignSub()
-        self.reshape = P.Reshape()
-
-    @classmethod
-    def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig):
-        """
-        A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct`
-
-        Examples:
-            >>> from mindspore import nn
-            >>> ic = 10
-            >>> oc = 100
-            >>> kernel_size = 3
-            >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size)
-            >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted,
-            >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-            ...                                         activation=FakeQuantWithMinMaxObserver.partial_init())
-            >>> conv_bn_quant = nn.Conv2dBnFoldQuantOneConv.from_float(conv_bn_op, quant_config)
-        """
-
-        kwargs = {'in_channels': convbn.conv.in_channels,
-                  'out_channels': convbn.conv.out_channels,
-                  'kernel_size': convbn.conv.kernel_size,
-                  'stride': convbn.conv.stride,
-                  'pad_mode': convbn.conv.pad_mode,
-                  'padding': convbn.conv.padding,
-                  'dilation': convbn.conv.dilation,
-                  'group': convbn.conv.group,
-                  'has_bias': convbn.conv.has_bias,
-                  'bias_init': convbn.conv.bias_init,
-                  'weight_init': convbn.conv.weight_init,
-                  'quant_config': quant_config,
-                  'fake': True,
-                  }
-        if hasattr(convbn, 'batchnorm'):
-            kwargs['eps'] = convbn.batchnorm.eps
-            kwargs['momentum'] = convbn.batchnorm.momentum
-            kwargs['beta_init'] = convbn.batchnorm.beta_init
-            kwargs['gamma_init'] = convbn.batchnorm.gamma_init
-            kwargs['mean_init'] = convbn.batchnorm.moving_mean_init
-            kwargs['var_init'] = convbn.batchnorm.moving_var_init
-        return cls(**kwargs)
-
-    def extend_repr(self):
-        """Display instance object as string."""
-        s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
-            'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'fake={}, momentum={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride,
-                                          self.pad_mode, self.padding, self.dilation, self.group, self.fake,
-                                          self.momentum)
-        return s
-
-    def construct(self, x):
-        running_std = P.Sqrt()(P.Add()(self.moving_variance, self.eps))
-        scale_factor = self.gamma / running_std
-        if self.channel_axis:
-            scale_factor = self.reshape(scale_factor, (1, -1, 1, 1))
-        else:
-            scale_factor = self.reshape(scale_factor, (-1, 1, 1, 1))
-        weight = self.weight * scale_factor
-        if self.fake:
-            weight = self.fake_quant_weight(weight)
-        conv = self.conv(x, weight)
-
-        if self.freeze_bn:
-            return conv + self.reshape((self.beta - self.gamma * self.moving_mean / running_std), (1, -1, 1, 1))
-        scale_factor = self.reshape(scale_factor, (1, -1, 1, 1))
-        if self.enable_default_train:
-            scale_factor = P.Reciprocal()(scale_factor)
-            conv_orig = conv * scale_factor
-        else:
-            conv_orig = conv / scale_factor
-        if self.training:
-            return self.bn_train(conv_orig,
-                                 self.gamma,
-                                 self.beta,
-                                 self.moving_mean,
-                                 self.moving_variance)[0]
-
-        return self.bn_infer(conv_orig,
-                             self.gamma,
-                             self.beta,
-                             self.moving_mean,
-                             self.moving_variance)[0]
-
-
-class Conv2dBnFoldQuant(Cell):
-    r"""
-    2D convolution with Batch Normalization operation folded construct.
-
-    This part is a more detailed overview of Conv2d operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`FakeQuantWithMinMaxObserver`.
-
-    .. math::
-        y = x\times w+  b
-
-        w_{q}=quant(\frac{w}{\sqrt{Var[y]+\epsilon}}*\gamma )
-
-        y_{out}= w_{q}\times x+\frac{b-E[y]}{\sqrt{Var[y]+\epsilon}}*\gamma +\beta
-
-    where :math:`quant` is the continuous execution of quant and dequant. Two convolution
-    and Batch Normalization operation are used here, the purpose of the first convolution and Batch Normalization
-    is to count the mean `E[y]` and variance `Var[y]` of current batch output for quantization.
-
-    Args:
-        in_channels (int): The number of input channel :math:`C_{in}`.
-        out_channels (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
-        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
-        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
-        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
-            divisible by the number of groups. Default: 1.
-        eps (float): Parameters for Batch Normalization. Default: 1e-5.
-        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            convolution kernel. Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            bias vector. Default: 'zeros'.
-        beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            beta vector. Default: 'zeros'.
-        gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            gamma vector. Default: 'ones'.
-        mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            mean vector. Default: 'zeros'.
-        var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the
-            variance vector. Default: 'ones'.
-        fake (bool): Whether Conv2dBnFoldQuant Cell adds FakeQuantWithMinMaxObserver. Default: True.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-        freeze_bn (int): The quantization freeze Batch Normalization op is according to the global step.
-            Default: 100000.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-
-    Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-
-    Raises:
-        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
-        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
-        TypeError: If `has_bias` or `fake` is not a bool.
-        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
-        ValueError: If `padding` is less than 0.
-        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> conv2d_bnfold = nn.Conv2dBnFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
-        ...                                      weight_init="ones", quant_config=qconfig)
-        >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
-        >>> result = conv2d_bnfold(x)
-        >>> print(result)
-        [[[[5.9296875 13.8359375]
-           [11.859375 17.78125]]]]
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 pad_mode='same',
-                 padding=0,
-                 dilation=1,
-                 group=1,
-                 eps=1e-5,
-                 momentum=0.997,
-                 has_bias=False,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 beta_init='zeros',
-                 gamma_init='ones',
-                 mean_init='zeros',
-                 var_init='ones',
-                 fake=True,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8,
-                 freeze_bn=100000):
-        """Initialize Conv2dBnFoldQuant layer"""
-        super(Conv2dBnFoldQuant, self).__init__()
-        if context.get_context('device_target') == "CPU":
-            raise ValueError(f"For '{self.cls_name}', only the 'Ascend' and 'GPU' platforms"
-                             f" are supported, but got {context.get_context('device_target')}.")
-        self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name)
-        self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name)
-        self.kernel_size = twice(kernel_size)
-        self.stride = twice(stride)
-        self.dilation = twice(dilation)
-        for kernel_size_elem in self.kernel_size:
-            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
-        for stride_elem in self.stride:
-            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
-        for dilation_elem in self.dilation:
-            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
-        if pad_mode not in ('valid', 'same', 'pad'):
-            raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values in "
-                             f"('valid', 'same', 'pad'), but got {pad_mode}.")
-        self.pad_mode = pad_mode
-        if isinstance(padding, int):
-            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
-            self.padding = padding
-        elif isinstance(padding, tuple):
-            for pad in padding:
-                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
-            self.padding = padding
-        else:
-            raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), "
-                            f"but got {type(padding).__name__}!")
-        self.group = Validator.check_positive_int(group, "group", self.cls_name)
-        self.eps = eps
-        self.momentum = momentum
-        self.has_bias = has_bias
-        self.freeze_bn = freeze_bn
-        self.fake = Validator.check_bool(fake, "fake", self.cls_name)
-        self.quant_config = quant_config
-        self.quant_dtype = quant_dtype
-        self.is_gpu = context.get_context('device_target') == "GPU"
-
-        # initialize convolution op and Parameter
-        self.conv = P.Conv2D(out_channel=out_channels,
-                             kernel_size=self.kernel_size,
-                             pad_mode=pad_mode,
-                             pad=padding,
-                             stride=self.stride,
-                             dilation=self.dilation,
-                             group=group)
-        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
-        channel_axis = 0
-        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
-        self.bias_add = P.BiasAdd()
-        self.bias = None
-        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
-            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
-
-        # initialize BatchNorm Parameter
-        self.gamma = Parameter(initializer(gamma_init, [out_channels]), name='gamma')
-        self.beta = Parameter(initializer(beta_init, [out_channels]), name='beta')
-        self.moving_mean = Parameter(initializer(mean_init, [out_channels]), name='moving_mean', requires_grad=False)
-        self.moving_variance = Parameter(initializer(var_init, [out_channels]), name='moving_variance',
-                                         requires_grad=False)
-
-        # initialize fake ops
-        self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis,
-                                                     num_channels=out_channels)
-        self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn)
-        self.correct_mul = Q.CorrectionMul(channel_axis)
-        if context.get_context('device_target') == "Ascend":
-            self.batchnorm_fold2_train = Q.BatchNormFold2D(freeze_bn=freeze_bn)
-            self.batchnorm_fold2_infer = Q.BatchNormFold2D(freeze_bn=0)
-        elif context.get_context('device_target') == "GPU":
-            self.batchnorm_fold2_train = Q.BatchNormFold2(freeze_bn=freeze_bn)
-            self.batchnorm_fold2_infer = Q.BatchNormFold2(freeze_bn=0)
-        self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False)
-        self.one = Tensor(1, mstype.int32)
-        self.assignadd = P.AssignAdd()
-
-    @classmethod
-    def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig, extra_args: dict):
-        """
-        A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct`
-
-        Examples:
-            >>> from mindspore import nn
-            >>> ic = 10
-            >>> oc = 100
-            >>> kernel_size = 3
-            >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size)
-            >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted
-            >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-            ...                                         activation=FakeQuantWithMinMaxObserver.partial_init())
-            >>> extra_args = {"freeze_bn": 100000}
-            >>> conv_bn_quant = nn.Conv2dBnFoldQuant.from_float(conv_bn_op, quant_config, extra_args)
-        """
-
-        kwargs = {'in_channels': convbn.conv.in_channels,
-                  'out_channels': convbn.conv.out_channels,
-                  'kernel_size': convbn.conv.kernel_size,
-                  'stride': convbn.conv.stride,
-                  'pad_mode': convbn.conv.pad_mode,
-                  'padding': convbn.conv.padding,
-                  'dilation': convbn.conv.dilation,
-                  'group': convbn.conv.group,
-                  'has_bias': convbn.conv.has_bias,
-                  'bias_init': convbn.conv.bias_init,
-                  'weight_init': convbn.conv.weight_init,
-                  'quant_config': quant_config,
-                  'fake': True,
-                  }
-        if hasattr(convbn, 'batchnorm'):
-            kwargs['eps'] = convbn.batchnorm.eps
-            kwargs['momentum'] = convbn.batchnorm.momentum
-            kwargs['beta_init'] = convbn.batchnorm.beta_init
-            kwargs['gamma_init'] = convbn.batchnorm.gamma_init
-            kwargs['mean_init'] = convbn.batchnorm.moving_mean_init
-            kwargs['var_init'] = convbn.batchnorm.moving_var_init
-        kwargs = {**kwargs, **extra_args}
-        return cls(**kwargs)
-
-    def extend_repr(self):
-        """Display instance object as string."""
-        s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
-            'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'fake={}, freeze_bn={}, momentum={}'.format(self.in_channels, self.out_channels, self.kernel_size,
-                                                        self.stride, self.pad_mode, self.padding, self.dilation,
-                                                        self.group, self.fake, self.freeze_bn, self.momentum)
-        return s
-
-    def construct(self, x):
-        out_conv = self.conv(x, self.weight)
-        if self.has_bias:
-            out_conv = self.bias_add(out_conv, self.bias)
-        # BN fold1
-        batch_mean, batch_std, running_mean, running_std = self.batchnorm_fold(out_conv,
-                                                                               self.moving_mean,
-                                                                               self.moving_variance,
-                                                                               self.step)
-        # fake weight
-        weight = self.correct_mul(self.weight, self.gamma, running_std)
-        if self.fake:
-            weight = self.fake_quant_weight(weight)
-        out = self.conv(x, weight)
-        if self.has_bias:
-            out = self.bias_add(out, self.bias)
-        # BN fold2
-        if self.is_gpu:
-            if self.training:
-                out = self.batchnorm_fold2_train(out, self.beta, self.gamma,
-                                                 batch_std, batch_mean, running_std, running_mean, self.step)
-                self.assignadd(self.step, self.one)
-            else:
-                out = self.batchnorm_fold2_infer(out, self.beta, self.gamma,
-                                                 batch_std, batch_mean, running_std, running_mean, self.step)
-        else:
-            if self.training:
-                out = self.batchnorm_fold2_train(out, self.beta, self.gamma, batch_std, batch_mean, running_std)
-                self.assignadd(self.step, self.one)
-            else:
-                out = self.batchnorm_fold2_infer(out, self.beta, self.gamma, running_std, running_mean, running_std)
-        return out
-
-
-class Conv2dBnWithoutFoldQuant(Cell):
-    r"""
-    2D convolution and batchnorm without fold with fake quantized construct.
-
-    This part is a more detailed overview of Conv2d operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    .. math::
-        y =x\times quant(w)+  b
-
-        y_{bn} =\frac{y-E[y] }{\sqrt{Var[y]+  \epsilon  } } *\gamma +  \beta
-
-    where :math:`quant` is the continuous execution of quant and dequant, you can refer to the implementation of
-    class of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        in_channels (int): The number of input channel :math:`C_{in}`.
-        out_channels (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
-        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
-        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
-        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
-            divisible by the number of groups. Default: 1.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
-        eps (float): Parameters for Batch Normalization. Default: 1e-5.
-        momentum (float): Parameters for Batch Normalization op. Default: 0.997.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
-            Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-
-    Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Raises:
-        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
-        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
-        TypeError: If `has_bias` is not a bool.
-        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
-        ValueError: If `padding` is less than 0.
-        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> conv2d_no_bnfold = nn.Conv2dBnWithoutFoldQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
-        ...                                                weight_init='ones', quant_config=qconfig)
-        >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
-        >>> result = conv2d_no_bnfold(x)
-        >>> print(result)
-        [[[[5.929658  13.835868]
-           [11.859316  17.78116]]]]
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 pad_mode='same',
-                 padding=0,
-                 dilation=1,
-                 group=1,
-                 has_bias=False,
-                 eps=1e-5,
-                 momentum=0.997,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 quant_config=quant_config_default):
-        """Initialize Conv2dBnWithoutFoldQuant."""
-        super(Conv2dBnWithoutFoldQuant, self).__init__()
-        self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name)
-        self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name)
-        self.has_bias = has_bias
-        self.kernel_size = twice(kernel_size)
-        self.stride = twice(stride)
-        self.dilation = twice(dilation)
-        for kernel_size_elem in self.kernel_size:
-            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
-        for stride_elem in self.stride:
-            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
-        for dilation_elem in self.dilation:
-            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
-        if pad_mode not in ('valid', 'same', 'pad'):
-            raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values in "
-                             f"('valid', 'same', 'pad'), but got {pad_mode}.")
-        self.pad_mode = pad_mode
-        if isinstance(padding, int):
-            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
-            self.padding = padding
-        elif isinstance(padding, tuple):
-            for pad in padding:
-                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
-            self.padding = padding
-        else:
-            raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), "
-                            f"but got {type(padding).__name__}!")
-        self.group = Validator.check_positive_int(group, "group", self.cls_name)
-        self.bias_add = P.BiasAdd()
-        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
-            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
-        else:
-            self.bias = None
-        # initialize convolution op and Parameter
-        self.conv = P.Conv2D(out_channel=self.out_channels,
-                             kernel_size=self.kernel_size,
-                             mode=1,
-                             pad_mode=self.pad_mode,
-                             pad=self.padding,
-                             stride=self.stride,
-                             dilation=self.dilation,
-                             group=self.group)
-        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
-        channel_axis = 0
-        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
-        self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis,
-                                                     num_channels=out_channels)
-        self.batchnorm = BatchNorm2d(out_channels, eps=eps, momentum=momentum)
-
-    @classmethod
-    def from_float(cls, convbn: Conv2dBnAct, quant_config: QuantConfig):
-        """
-        A class method to create `Conv2dBnFoldQuantOneConv` from a `Conv2dBnAct`
-
-        Examples:
-            >>> from mindspore import nn
-            >>> ic = 10
-            >>> oc = 100
-            >>> kernel_size = 3
-            >>> conv_bn_op = nn.Conv2dBnAct(ic, oc, kernel_size)
-            >>> # when apply QAT on `conv_bn_op`, QAT need to create a quant Conv2dBnAct whose weight is fake-quanted
-            >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-            ...                                         activation=FakeQuantWithMinMaxObserver.partial_init())
-            >>> conv_bn_quant = nn.Conv2dBnFoldQuant.from_float(conv_bn_op, quant_config)
-        """
-
-        kwargs = {'in_channels': convbn.conv.in_channels,
-                  'out_channels': convbn.conv.out_channels,
-                  'kernel_size': convbn.conv.kernel_size,
-                  'stride': convbn.conv.stride,
-                  'pad_mode': convbn.conv.pad_mode,
-                  'padding': convbn.conv.padding,
-                  'dilation': convbn.conv.dilation,
-                  'group': convbn.conv.group,
-                  'has_bias': convbn.conv.has_bias,
-                  'bias_init': convbn.conv.bias_init,
-                  'weight_init': convbn.conv.weight_init,
-                  'quant_config': quant_config,
-                  }
-        if hasattr(convbn, 'batchnorm'):
-            kwargs['eps'] = convbn.batchnorm.eps
-            kwargs['momentum'] = convbn.batchnorm.momentum
-        kwargs = {**kwargs}
-        return cls(**kwargs)
-
-    def construct(self, x):
-        weight = self.fake_quant_weight(self.weight)
-        out = self.conv(x, weight)
-        if self.has_bias:
-            out = self.bias_add(out, self.bias)
-        out = self.batchnorm(out)
-        return out
-
-    def extend_repr(self):
-        """Display instance object as string."""
-        s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
-            'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'has_bias={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode,
-                                 self.padding, self.dilation, self.group, self.has_bias)
-        return s
-
-
-class Conv2dQuant(Cell):
-    r"""
-    2D convolution with fake quantized operation layer.
-
-    This part is a more detailed overview of Conv2d operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        in_channels (int): The number of input channel :math:`C_{in}`.
-        out_channels (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution window.
-        stride (Union[int, tuple[int]]): Specifies stride for all spatial dimensions with the same value. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
-        padding (Union[int, tuple[int]]): Implicit paddings on both sides of the `x`. Default: 0.
-        dilation (Union[int, tuple[int]]): Specifies the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
-            divisible by the number of groups. Default: 1.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
-            Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Default: 'zeros'.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-          The input dimension is preferably 2D or 4D.
-
-    Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-
-    Raises:
-        TypeError: If `in_channels`, `out_channels` or `group` is not an int.
-        TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int nor a tuple.
-        TypeError: If `has_bias` is not a bool.
-        ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1.
-        ValueError: If `padding` is less than 0.
-        ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> conv2d_quant = nn.Conv2dQuant(1, 1, kernel_size=(2, 2), stride=(1, 1), pad_mode="valid",
-        ...                               weight_init='ones', quant_config=qconfig)
-        >>> x = Tensor(np.array([[[[1, 0, 3], [1, 4, 7], [2, 5, 2]]]]), mindspore.float32)
-        >>> result = conv2d_quant(x)
-        >>> print(result)
-        [[[[5.9296875  13.8359375]
-           [11.859375  17.78125]]]]
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 pad_mode='same',
-                 padding=0,
-                 dilation=1,
-                 group=1,
-                 has_bias=False,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize Conv2dQuant."""
-        super(Conv2dQuant, self).__init__()
-        self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name)
-        self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name)
-        self.has_bias = has_bias
-        self.kernel_size = twice(kernel_size)
-        self.stride = twice(stride)
-        self.dilation = twice(dilation)
-        for kernel_size_elem in self.kernel_size:
-            Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name)
-        for stride_elem in self.stride:
-            Validator.check_positive_int(stride_elem, 'stride item', self.cls_name)
-        for dilation_elem in self.dilation:
-            Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name)
-        if pad_mode not in ('valid', 'same', 'pad'):
-            raise ValueError(f"For '{self.cls_name}', the 'pad_mode' must be one of values "
-                             f"in ('valid', 'same', 'pad'), but got {pad_mode}.")
-        self.pad_mode = pad_mode
-        if isinstance(padding, int):
-            Validator.check_non_negative_int(padding, 'padding', self.cls_name)
-            self.padding = padding
-        elif isinstance(padding, tuple):
-            for pad in padding:
-                Validator.check_non_negative_int(pad, 'padding item', self.cls_name)
-            self.padding = padding
-        else:
-            raise TypeError(f"For '{self.cls_name}', the type of 'padding' must be int/tuple(int), "
-                            f"but got {type(padding).__name__}!")
-        self.group = Validator.check_positive_int(group, "group", self.cls_name)
-
-        weight_shape = [out_channels, in_channels // group, *self.kernel_size]
-        self.weight = Parameter(initializer(weight_init, weight_shape), name='weight')
-
-        self.bias_add = P.BiasAdd()
-        if Validator.check_bool(has_bias, "has_bias", self.cls_name):
-            self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias')
-        else:
-            self.bias = None
-
-        self.conv = P.Conv2D(out_channel=self.out_channels,
-                             kernel_size=self.kernel_size,
-                             mode=1,
-                             pad_mode=self.pad_mode,
-                             pad=self.padding,
-                             stride=self.stride,
-                             dilation=self.dilation,
-                             group=self.group)
-        channel_axis = 0
-        self.fake_quant_weight = quant_config.weight(channel_axis=channel_axis,
-                                                     num_channels=out_channels)
-
-    @classmethod
-    def from_float(cls, conv: Conv2d, quant_config: QuantConfig):
-        """
-        A class method to create `Conv2dQuant` from a `Conv2d`
-
-        Examples:
-            >>> from mindspore import nn
-            >>> ic = 10
-            >>> oc = 100
-            >>> kernel_size = 3
-            >>> conv_op = nn.Conv2d(ic, oc, kernel_size)
-            >>> # when apply QAT on `conv_op`, QAT need to create a quant conv2d whose weight is fake-quanted
-            >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-            ...                                         activation=FakeQuantWithMinMaxObserver.partial_init())
-            >>> conv_quant = nn.Conv2dQuant.from_float(conv_op, quant_config)
-        """
-        conv_quant = cls(
-            conv.in_channels,
-            conv.out_channels,
-            kernel_size=conv.kernel_size,
-            stride=conv.stride,
-            pad_mode=conv.pad_mode,
-            padding=conv.padding,
-            dilation=conv.dilation,
-            group=conv.group,
-            has_bias=conv.has_bias,
-            bias_init=conv.bias_init,
-            weight_init=conv.weight_init,
-            quant_config=quant_config)
-        return conv_quant
-
-    def construct(self, x):
-        weight = self.fake_quant_weight(self.weight)
-        out = self.conv(x, weight)
-        if self.has_bias:
-            return self.bias_add(out, self.bias)
-        return out
-
-    def extend_repr(self):
-        """Display instance object as string."""
-        s = 'in_channels={}, out_channels={}, kernel_size={}, stride={}, ' \
-            'pad_mode={}, padding={}, dilation={}, group={}, ' \
-            'has_bias={}'.format(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.pad_mode,
-                                 self.padding, self.dilation, self.group, self.has_bias)
-        return s
-
-
-class DenseQuant(Cell):
-    r"""
-    The fully connected layer with fake quantized operation.
-
-    This part is a more detailed overview of Dense operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        in_channels (int): The dimension of the input space.
-        out_channels (int): The dimension of the output space.
-        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
-            is same as `x`. The values of str refer to the function `initializer`. Default: 'normal'.
-        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
-            same as `x`. The values of str refer to the function `initializer`. Default: 'zeros'.
-        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
-        activation (Union[str, Cell, Primitive]): The regularization function applied to the output of the layer,
-            eg. 'relu'. Default: None.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-          The input dimension is preferably 2D or 4D.
-
-    Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
-
-    Raises:
-        TypeError: If `in_channels`, `out_channels` is not an int.
-        TypeError: If `has_bias` is not a bool.
-        TypeError: If `activation` is not str, Cell and Primitive.
-        ValueError: If `in_channels` or `out_channels` is less than 1.
-        ValueError: If the dims of `weight_init` is not equal to 2 or the first element of `weight_init` is not equal
-            to `out_channels` or the second element of `weight_init` is not equal to `in_channels`.
-        ValueError: If the dims of `bias_init` is not equal to 1 or the element of `bias_init` is not equal
-            to `out_channels`.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> dense_quant = nn.DenseQuant(2, 1, weight_init='ones', quant_config=qconfig)
-        >>> x = Tensor(np.array([[1, 5], [3, 4]]), mindspore.float32)
-        >>> result = dense_quant(x)
-        >>> print(result)
-        [[5.929413]
-         [6.9176483]]
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 has_bias=True,
-                 activation=None,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize DenseQuant."""
-        super(DenseQuant, self).__init__()
-        self.in_channels = Validator.check_positive_int(in_channels, "in_channels", self.cls_name)
-        self.out_channels = Validator.check_positive_int(out_channels, "out_channels", self.cls_name)
-        self.has_bias = Validator.check_bool(has_bias, "has_bias", self.cls_name)
-
-        if isinstance(weight_init, Tensor):
-            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
-                    weight_init.shape[1] != in_channels:
-                raise ValueError(f"For '{self.cls_name}', weight init shape error. The ndim of 'weight_init' should "
-                                 f"be equal to 2, and the first dim must be equal to 'out_channels', and the "
-                                 f"second dim must be equal to 'in_channels'. But got 'weight_init': {weight_init}, "
-                                 f"'out_channels': {out_channels}, 'in_channels': {in_channels}.")
-
-        self.weight = Parameter(initializer(
-            weight_init, [out_channels, in_channels]), name="weight")
-
-        if self.has_bias:
-            if isinstance(bias_init, Tensor):
-                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
-                    raise ValueError(f"For '{self.cls_name}', bias init shape error. The ndim of 'bias_init' should "
-                                     f"be equal to 1, and the first dim must be equal to 'out_channels'. But got "
-                                     f"'bias_init': {bias_init}, 'out_channels': {out_channels}.")
-
-            self.bias = Parameter(initializer(
-                bias_init, [out_channels]), name="bias")
-
-        self.matmul = P.MatMul(transpose_b=True)
-        self.bias_add = P.BiasAdd()
-
-        self.activation = get_activation(activation) if isinstance(activation, str) else activation
-        if activation is not None and not isinstance(self.activation, (Cell, Primitive)):
-            raise TypeError(f"For '{self.cls_name}', the 'activation' must be str or Cell or Primitive, "
-                            f"but got {activation}.")
-
-        self.activation_flag = self.activation is not None
-        self.fake_quant_weight = quant_config.weight(channel_axis=0,
-                                                     num_channels=out_channels)
-
-    @classmethod
-    def from_float(cls, dense: Dense, quant_config: QuantConfig):
-        """
-        A class method to create `DenseQuant` from a `Dense`
-
-        Examples:
-            >>> from mindspore import nn
-            >>> ic = 10
-            >>> oc = 100
-            >>> dense_op = nn.Dense(ic, oc)
-            >>> # when apply QAT on `dense_op`, QAT need to create a quant dense whose weight is fake-quanted
-            >>> quant_config: QuantConfig = QuantConfig(weight=FakeQuantWithMinMaxObserver.partial_init(),
-            ...                                         activation=FakeQuantWithMinMaxObserver.partial_init())
-            >>> dense_quant = nn.DenseQuant.from_float(dense_op, quant_config)
-        """
-        dense_quant = cls(
-            dense.in_channels,
-            dense.out_channels,
-            dense.weight,
-            dense.bias,
-            dense.has_bias,
-            dense.activation,
-            quant_config=quant_config)
-        return dense_quant
-
-    def construct(self, x):
-        """Use operators to construct the Dense layer.
-
-        Args:
-            x (Tensor): Input tensor.
-        """
-        output = self.fake_quant_weight(self.weight)
-        output = self.matmul(x, output)
-        if self.has_bias:
-            output = self.bias_add(output, self.bias)
-        if self.activation_flag:
-            return self.activation(output)
-        return output
-
-    def extend_repr(self):
-        """A pretty print for Dense layer."""
-        s = 'in_channels={}, out_channels={}, weight={}, has_bias={}'.format(
-            self.in_channels, self.out_channels, self.weight, self.has_bias)
-        if self.has_bias:
-            s += ', bias={}'.format(self.bias)
-        if self.activation_flag:
-            s += ', activation={}'.format(self.activation)
-        return s
-
-
-class _QuantActivation(Cell):
-    r"""
-    Base class for quantization aware training activation function. Adds fake quantized operation
-    after activation operation.
-    """
-
-    def get_origin(self):
-        raise NotImplementedError
-
-
-class ActQuant(_QuantActivation):
-    r"""
-    Quantization aware training activation function.
-
-    Add the fake quantized operation to the end of activation operation, by which the output of activation
-    operation will be truncated. For more details about Quantization, please refer to the implementation
-    of subclass of `FakeQuantWithMinMaxObserver`, :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        activation (Cell): Activation cell.
-        ema (bool): The exponential Moving Average algorithm updates min and max. Default: False.
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        fake_before (bool): Whether add fake quantized operation before activation. Default: False.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x** (Tensor) - The input of ActQuant. The input dimension is preferably 2D or 4D.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x`.
-
-    Raises:
-        TypeError: If `activation` is not an instance of Cell.
-        TypeError: If `fake_before` is not a bool.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> act_quant = nn.ActQuant(nn.ReLU(), quant_config=qconfig)
-        >>> x = Tensor(np.array([[1, 2, -1], [-2, 0, -1]]), mindspore.float32)
-        >>> result = act_quant(x)
-        >>> print(result)
-        [[0.9882355 1.9764705 0.       ]
-         [0.        0.        0.       ]]
-    """
-
-    def __init__(self,
-                 activation,
-                 ema=False,
-                 ema_decay=0.999,
-                 fake_before=False,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize ActQuant."""
-        super(ActQuant, self).__init__()
-        act_class = activation.__class__
-        act_list = [nn.ReLU, nn.ReLU6]
-        self.act = Validator.check_isinstance("activation", activation, Cell)
-        self.fake_before = Validator.check_bool(fake_before, "fake_before", self.cls_name)
-        if self.fake_before:
-            self.fake_quant_act_before = quant_config.activation(min_init=-6,
-                                                                 max_init=6,
-                                                                 ema=ema,
-                                                                 ema_decay=ema_decay,
-                                                                 quant_dtype=quant_dtype)
-        self.neg_trunc = False
-        self.narrow_range = False
-        preset_dict = quant_config.activation.p.keywords
-        if 'mode' in preset_dict and preset_dict['mode'] == "LEARNED_SCALE" and act_class in act_list:
-            self.neg_trunc = True
-        elif 'narrow_range' in preset_dict:
-            self.narrow_range = preset_dict['narrow_range']
-
-        self.fake_quant_act = quant_config.activation(min_init=-6,
-                                                      max_init=6,
-                                                      ema=ema,
-                                                      ema_decay=ema_decay,
-                                                      quant_dtype=quant_dtype,
-                                                      neg_trunc=self.neg_trunc,
-                                                      narrow_range=self.narrow_range)
-
-    def construct(self, x):
-        if self.fake_before:
-            x = self.fake_quant_act_before(x)
-        x = self.act(x)
-        x = self.fake_quant_act(x)
-        return x
-
-    def get_origin(self):
-        return self.act
-
-
-class TensorAddQuant(Cell):
-    r"""
-    Adds fake quantized operation after TensorAdd operation.
-
-    This part is a more detailed overview of TensorAdd operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x1** (Tensor) - The first tensor of TensorAddQuant. The input dimension is preferably 2D or 4D.
-        - **x2** (Tensor) - The second tensor of TensorAddQuant. Has the same shape with `x1`.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x1`.
-
-    Raises:
-        TypeError: If `ema_decay` is not a float.
-        ValueError: If the shape of `x2` is different with `x1`.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> add_quant = nn.TensorAddQuant(quant_config=qconfig)
-        >>> x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
-        >>> x2 = Tensor(np.ones((2, 3)), mindspore.float32)
-        >>> output = add_quant(x1, x2)
-        >>> print(output)
-        [[ 1.9764705  3.011765   1.9764705]
-         [-0.9882355  0.9882355  0.       ]]
-    """
-
-    def __init__(self,
-                 ema_decay=0.999,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize TensorAddQuant."""
-        super(TensorAddQuant, self).__init__()
-        self.fake_quant_act = quant_config.activation(min_init=-6,
-                                                      max_init=6,
-                                                      ema=True,
-                                                      ema_decay=ema_decay,
-                                                      quant_dtype=quant_dtype)
-        self.add = P.Add()
-
-    def construct(self, x1, x2):
-        x = self.add(x1, x2)
-        x = self.fake_quant_act(x)
-        return x
-
-
-class MulQuant(Cell):
-    r"""
-    Adds fake quantized operation after `Mul` operation.
-
-    This part is a more detailed overview of `Mul` operation. For more details about Quantization,
-    please refer to the implementation of class of `FakeQuantWithMinMaxObserver`,
-    :class:`mindspore.nn.FakeQuantWithMinMaxObserver`.
-
-    Args:
-        ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999.
-        quant_config (QuantConfig): Configures the types of quant observer and quant settings of weight and
-            activation. Note that, QuantConfig is a special namedtuple, which is designed for quantization
-            and can be generated by :func:`mindspore.compression.quant.create_quant_config` method.
-            Default: QuantConfig with both items set to default :class:`FakeQuantWithMinMaxObserver`.
-        quant_dtype (QuantDtype): Specifies the FakeQuant datatype. Default: QuantDtype.INT8.
-
-    Inputs:
-        - **x1** (Tensor) - The first tensor of MulQuant. The input dimension is preferably 2D or 4D.
-        - **x2** (Tensor) - The second tensor of MulQuant. Has the same shape with `x1`.
-
-    Outputs:
-        Tensor, with the same type and shape as the `x1`.
-
-    Raises:
-        TypeError: If `ema_decay` is not a float.
-        ValueError: If the shape of `x2` is different with `x1`.
-
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-
-    Examples:
-        >>> import mindspore
-        >>> from mindspore.compression import quant
-        >>> from mindspore import Tensor
-        >>> qconfig = quant.create_quant_config()
-        >>> mul_quant = nn.MulQuant(quant_config=qconfig)
-        >>> x1 = Tensor(np.array([[1, 2, 1], [-2, 0, -1]]), mindspore.float32)
-        >>> x2 = Tensor(np.ones((2, 3)) * 2, mindspore.float32)
-        >>> output = mul_quant(x1, x2)
-        >>> print(output)
-        [[ 1.9764705  4.0000005  1.9764705]
-         [-4.         0.        -1.9764705]]
-    """
-
-    def __init__(self,
-                 ema_decay=0.999,
-                 quant_config=quant_config_default,
-                 quant_dtype=QuantDtype.INT8):
-        """Initialize MulQuant."""
-        super(MulQuant, self).__init__()
-        self.fake_quant_act = quant_config.activation(min_init=-6,
-                                                      max_init=6,
-                                                      ema=True,
-                                                      ema_decay=ema_decay,
-                                                      quant_dtype=quant_dtype)
-        self.mul = P.Mul()
-
-    def construct(self, x1, x2):
-        x = self.mul(x1, x2)
-        x = self.fake_quant_act(x)
-        return x
diff --git a/mindspore/python/mindspore/train/serialization.py b/mindspore/python/mindspore/train/serialization.py
index 48581c1d382..59c2d286359 100644
--- a/mindspore/python/mindspore/train/serialization.py
+++ b/mindspore/python/mindspore/train/serialization.py
@@ -25,7 +25,6 @@ import stat
 import threading
 from threading import Thread, Lock
 from collections import defaultdict, OrderedDict
-from functools import wraps
 from io import BytesIO
 
 import math
@@ -52,7 +51,6 @@ from mindspore.common.parameter import Parameter
 from mindspore.common.tensor import Tensor
 from mindspore.common._utils import is_shape_unknown
 from mindspore.communication.management import get_rank, get_group_size
-from mindspore.compression.export import quant_export
 from mindspore.experimental import MapParameter
 from mindspore.parallel._cell_wrapper import get_allgather_cell
 from mindspore.parallel._tensor import _load_tensor, _get_tensor_strategy, _get_tensor_slice_index
@@ -1123,12 +1121,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
 
         kwargs (dict): Configuration options dictionary.
 
-            - quant_mode (str): If the network is a quantization aware training network, the quant_mode should
-              be set to "QUANT", else the quant_mode should be set to "NONQUANT".
-            - mean (float): The mean of input data after preprocessing, used for quantizing the first layer of network.
-              Default: 127.5.
-            - std_dev (float): The variance of input data after preprocessing,
-              used for quantizing the first layer of the network. Default: 127.5.
             - enc_key (byte): Byte-type key used for encryption. The valid length is 16, 24, or 32.
             - enc_mode (Union[str, function]): Specifies the encryption mode, to take effect when enc_key is set.
 
@@ -1192,7 +1184,6 @@ def export(net, *inputs, file_name, file_format, **kwargs):
         inputs = tuple(inputs_col)
 
     file_name = os.path.realpath(file_name)
-    net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
     if 'enc_key' in kwargs.keys():
         kwargs['enc_key'], kwargs['enc_mode'] = _check_key_mode_type(file_format, **kwargs)
     _export(net, file_name, file_format, *inputs, **kwargs)
@@ -1560,62 +1551,6 @@ def _save_dataset_to_mindir(model, dataset):
             model.preprocessor.op[-1].offload = op['offload'] if 'offload' in op.keys() else False
 
 
-def quant_mode_manage(func):
-    """Inherit the quant_mode in old version."""
-
-    @wraps(func)
-    def wrapper(network, *inputs, file_format, **kwargs):
-        if 'quant_mode' not in kwargs:
-            return network
-        quant_mode = kwargs.get('quant_mode')
-        if not isinstance(quant_mode, str):
-            raise TypeError("For 'export', the type of 'quant_mode' should be string, "
-                            "but got {}.".format(type(quant_mode)))
-        if quant_mode in ('AUTO', 'MANUAL'):
-            kwargs['quant_mode'] = 'QUANT'
-        return func(network, *inputs, file_format=file_format, **kwargs)
-
-    return wrapper
-
-
-@quant_mode_manage
-def _quant_export(network, *inputs, file_format, **kwargs):
-    """Exports MindSpore quantization predict model to deploy with AIR and MINDIR."""
-    supported_device = ["Ascend", "GPU"]
-    supported_formats = ['AIR', 'MINDIR']
-    quant_mode_formats = ['QUANT', 'NONQUANT']
-
-    quant_mode = kwargs['quant_mode']
-    if quant_mode not in quant_mode_formats:
-        raise KeyError(f"For 'export', the argument 'quant_mode' must be one of {quant_mode_formats}, "
-                       f"but got {quant_mode}.")
-    if quant_mode == 'NONQUANT':
-        return network
-    quant_net = copy.deepcopy(network)
-    quant_net._create_time = int(time.time() * 1e9)
-
-    mean = 127.5 if kwargs.get('mean', None) is None else kwargs.get('mean')
-    std_dev = 127.5 if kwargs.get('std_dev', None) is None else kwargs.get('std_dev')
-    mean = Validator.check_value_type("mean", mean, (int, float))
-    std_dev = Validator.check_value_type("std_dev", std_dev, (int, float))
-
-    if context.get_context('device_target') not in supported_device:
-        raise KeyError(f"For 'export', quant export only support {supported_device} device target now, "
-                       f"but got {context.get_context('device_target')}")
-
-    if file_format not in supported_formats:
-        raise ValueError(f"For 'export', quant export only support 'file_format' {supported_formats}, "
-                         f"but got {file_format}.")
-
-    quant_net.set_train(False)
-    if file_format == "MINDIR":
-        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs, is_mindir=True)
-    else:
-        exporter = quant_export.ExportToQuantInferNetwork(quant_net, mean, std_dev, *inputs)
-    deploy_net = exporter.run()
-    return deploy_net
-
-
 def parse_print(print_file_name):
     """
     Parse data file generated by mindspore.ops.Print.
diff --git a/tests/st/quantization/lenet_quant/config.py b/tests/st/quantization/lenet_quant/config.py
deleted file mode 100644
index 1106edfa6da..00000000000
--- a/tests/st/quantization/lenet_quant/config.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-network config setting, will be used in test_lenet_quant.py
-"""
-
-from easydict import EasyDict as edict
-
-quant_cfg = edict({
-    'num_classes': 10,
-    'lr': 0.01,
-    'momentum': 0.9,
-    'epoch_size': 10,
-    'batch_size': 64,
-    'buffer_size': 1000,
-    'image_height': 32,
-    'image_width': 32,
-    'keep_checkpoint_max': 10,
-})
diff --git a/tests/st/quantization/lenet_quant/dataset.py b/tests/st/quantization/lenet_quant/dataset.py
deleted file mode 100644
index 164825686b2..00000000000
--- a/tests/st/quantization/lenet_quant/dataset.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Produce the dataset
-"""
-
-import mindspore.dataset as ds
-import mindspore.dataset.vision as CV
-import mindspore.dataset.transforms as C
-from mindspore.dataset.vision import Inter
-from mindspore.common import dtype as mstype
-
-
-def create_dataset(data_path, batch_size=32, repeat_size=1,
-                   num_parallel_workers=1):
-    """
-    create dataset for train or test
-    """
-    # define dataset
-    mnist_ds = ds.MnistDataset(data_path)
-
-    resize_height, resize_width = 32, 32
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    rescale_nml = 1 / 0.3081
-    shift_nml = -1 * 0.1307 / 0.3081
-
-    # define map operations
-    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
-    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
-    rescale_op = CV.Rescale(rescale, shift)
-    hwc2chw_op = CV.HWC2CHW()
-    type_cast_op = C.TypeCast(mstype.int32)
-
-    # apply map operations on images
-    mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-    mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
-
-    # apply DatasetOps
-    buffer_size = 10000
-    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)  # 10000 as in LeNet train script
-    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
-    mnist_ds = mnist_ds.repeat(repeat_size)
-
-    return mnist_ds
diff --git a/tests/st/quantization/lenet_quant/lenet_fusion.py b/tests/st/quantization/lenet_quant/lenet_fusion.py
deleted file mode 100644
index 88b35935027..00000000000
--- a/tests/st/quantization/lenet_quant/lenet_fusion.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""LeNet."""
-import mindspore.nn as nn
-
-
-class LeNet5(nn.Cell):
-    """
-    Lenet network
-
-    Args:
-        num_class (int): Num classes. Default: 10.
-
-    Returns:
-        Tensor, output tensor
-    Examples:
-        >>> LeNet(num_class=10)
-
-    """
-
-    def __init__(self, num_class=10, channel=1):
-        super(LeNet5, self).__init__()
-        self.type = "fusion"
-        self.num_class = num_class
-
-        # change `nn.Conv2d` to `nn.Conv2dBnAct`
-        self.conv1 = nn.Conv2dBnAct(channel, 6, 5, pad_mode='valid', activation='relu')
-        self.conv2 = nn.Conv2dBnAct(6, 16, 5, pad_mode='valid', activation='relu')
-        # change `nn.Dense` to `nn.DenseBnAct`
-        self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu')
-        self.fc2 = nn.DenseBnAct(120, 84, activation='relu')
-        self.fc3 = nn.DenseBnAct(84, self.num_class)
-
-        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.flatten = nn.Flatten()
-
-    def construct(self, x):
-        x = self.conv1(x)
-        x = self.max_pool2d(x)
-        x = self.conv2(x)
-        x = self.max_pool2d(x)
-        x = self.flatten(x)
-        x = self.fc1(x)
-        x = self.fc2(x)
-        x = self.fc3(x)
-        return x
diff --git a/tests/st/quantization/lenet_quant/test_lenet_quant.py b/tests/st/quantization/lenet_quant/test_lenet_quant.py
deleted file mode 100644
index f52d04c5375..00000000000
--- a/tests/st/quantization/lenet_quant/test_lenet_quant.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-train and infer lenet quantization network
-"""
-
-import os
-import pytest
-from mindspore import context
-from mindspore import Tensor
-from mindspore.common import dtype as mstype
-import mindspore.nn as nn
-from mindspore.train.metrics import Accuracy
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
-from mindspore import load_checkpoint, load_param_into_net, export
-from mindspore.train import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.compression.quant.quantizer import OptimizeOption
-from mindspore.compression.quant.quant_utils import load_nonquant_param_into_quant_net
-from dataset import create_dataset
-from config import quant_cfg
-from lenet_fusion import LeNet5 as LeNet5Fusion
-import numpy as np
-
-data_path = "/home/workspace/mindspore_dataset/mnist"
-lenet_ckpt_path = "/home/workspace/mindspore_dataset/checkpoint/lenet/ckpt_lenet_noquant-10_1875.ckpt"
-
-def train_lenet_quant(optim_option="QAT"):
-    cfg = quant_cfg
-    ckpt_path = lenet_ckpt_path
-    ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1)
-    step_size = ds_train.get_dataset_size()
-
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-
-    # load quantization aware network checkpoint
-    param_dict = load_checkpoint(ckpt_path)
-    load_nonquant_param_into_quant_net(network, param_dict)
-
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=900,
-                                              bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
-    # define network optimization
-    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
-
-    # call back and monitor
-    config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size,
-                                   keep_checkpoint_max=cfg.keep_checkpoint_max)
-    ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant"+optim_option, config=config_ckpt)
-
-    # define model
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
-
-    print("============== Starting Training ==============")
-    model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()],
-                dataset_sink_mode=True)
-    print("============== End Training ==============")
-
-
-def eval_quant(optim_option="QAT"):
-    cfg = quant_cfg
-    ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1)
-    ckpt_path = './ckpt_lenet_quant'+optim_option+'-10_937.ckpt'
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=0,
-                                              bn_fold=False,
-                                              freeze_bn=10000,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
-    # define network optimization
-    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
-
-    # call back and monitor
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
-
-    # load quantization aware network checkpoint
-    param_dict = load_checkpoint(ckpt_path)
-    not_load_param = load_param_into_net(network, param_dict)
-    if not_load_param:
-        raise ValueError("Load param into net fail!")
-
-    print("============== Starting Testing ==============")
-    acc = model.eval(ds_eval, dataset_sink_mode=True)
-    print("============== {} ==============".format(acc))
-    assert acc['Accuracy'] > 0.98
-
-
-def export_lenet(optim_option="QAT", file_format="MINDIR"):
-    cfg = quant_cfg
-    # define fusion network
-    network = LeNet5Fusion(cfg.num_classes)
-    # convert fusion network to quantization aware network
-    if optim_option == "LEARNED_SCALE":
-        quant_optim_otions = OptimizeOption.LEARNED_SCALE
-        quantizer = QuantizationAwareTraining(bn_fold=False,
-                                              per_channel=[True, False],
-                                              symmetric=[True, True],
-                                              narrow_range=[True, True],
-                                              freeze_bn=0,
-                                              quant_delay=0,
-                                              one_conv_fold=True,
-                                              optimize_option=quant_optim_otions)
-    else:
-        quantizer = QuantizationAwareTraining(quant_delay=0,
-                                              bn_fold=False,
-                                              freeze_bn=10000,
-                                              per_channel=[True, False],
-                                              symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # export network
-    inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32)
-    export(network, inputs, file_name="lenet_quant", file_format=file_format, quant_mode='AUTO')
-
-
-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
-def test_lenet_quant():
-    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    train_lenet_quant()
-    eval_quant()
-    export_lenet()
-    train_lenet_quant(optim_option="LEARNED_SCALE")
-    eval_quant(optim_option="LEARNED_SCALE")
-    export_lenet(optim_option="LEARNED_SCALE")
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_lenet_quant_ascend():
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    train_lenet_quant(optim_option="LEARNED_SCALE")
-    eval_quant(optim_option="LEARNED_SCALE")
-    export_lenet(optim_option="LEARNED_SCALE", file_format="AIR")
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_lenet_quant_ascend_pynative():
-    """
-    test_lenet_quant_ascend_pynative
-    Features: test_lenet_quant_ascend_pynative
-    Description: test_lenet_quant_ascend_pynative pynative mode
-    Expectation: None
-    """
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
-    train_lenet_quant(optim_option="QAT")
diff --git a/tests/st/quantization/mobilenetv2_quant/dataset.py b/tests/st/quantization/mobilenetv2_quant/dataset.py
deleted file mode 100644
index ee2802fa86a..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/dataset.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-""" create train dataset. """
-
-from functools import partial
-import mindspore.dataset as ds
-import mindspore.common.dtype as mstype
-import mindspore.dataset.vision as C
-import mindspore.dataset.transforms as C2
-
-
-def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
-    """
-    create a train dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        config(EasyDict)：the basic config for training
-        repeat_num(int): the repeat times of dataset. Default: 1.
-        batch_size(int): the batch size of dataset. Default: 32.
-
-    Returns:
-        dataset
-    """
-
-    load_func = partial(ds.Cifar10Dataset, dataset_path)
-    cifar_ds = load_func(num_parallel_workers=8, shuffle=False)
-
-    resize_height = config.image_height
-    resize_width = config.image_width
-    rescale = 1.0 / 255.0
-    shift = 0.0
-
-    # define map operations
-    # interpolation default BILINEAR
-    resize_op = C.Resize((resize_height, resize_width))
-    rescale_op = C.Rescale(rescale, shift)
-    normalize_op = C.Normalize(
-        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
-    changeswap_op = C.HWC2CHW()
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    c_trans = [resize_op, rescale_op, normalize_op, changeswap_op]
-
-    # apply map operations on images
-    cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op)
-    cifar_ds = cifar_ds.map(input_columns="image", operations=c_trans)
-
-    # apply batch operations
-    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    cifar_ds = cifar_ds.repeat(repeat_num)
-
-    return cifar_ds
diff --git a/tests/st/quantization/mobilenetv2_quant/lr_generator.py b/tests/st/quantization/mobilenetv2_quant/lr_generator.py
deleted file mode 100644
index bc6ff8106e2..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/lr_generator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""learning rate generator"""
-
-import math
-import numpy as np
-
-
-def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
-    """
-    generate learning rate array
-
-    Args:
-       global_step(int): total steps of the training
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    for i in range(total_steps):
-        if i < warmup_steps:
-            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-        else:
-            lr = lr_end + \
-                (lr_max - lr_end) * \
-                (1. + math.cos(math.pi * (i - warmup_steps) /
-                               (total_steps - warmup_steps))) / 2.
-        if lr < 0.0:
-            lr = 0.0
-        lr_each_step.append(lr)
-
-    current_step = global_step
-    lr_each_step = np.array(lr_each_step).astype(np.float32)
-    learning_rate = lr_each_step[current_step:]
-
-    return learning_rate
diff --git a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py b/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
deleted file mode 100644
index bd176021b29..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/mobilenetV2.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2 Quant model define"""
-
-import numpy as np
-
-import mindspore.nn as nn
-from mindspore.ops import operations as P
-from mindspore import Tensor
-
-__all__ = ['mobilenetV2']
-
-
-def _make_divisible(v, divisor, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    # Make sure that round down does not go down by more than 10%.
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class GlobalAvgPooling(nn.Cell):
-    """
-    Global avg pooling definition.
-
-    Args:
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> GlobalAvgPooling()
-    """
-
-    def __init__(self):
-        super(GlobalAvgPooling, self).__init__()
-        self.mean = P.ReduceMean(keep_dims=False)
-
-    def construct(self, x):
-        x = self.mean(x, (2, 3))
-        return x
-
-
-class ConvBNReLU(nn.Cell):
-    """
-    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
-
-    Args:
-        in_planes (int): Input channel.
-        out_planes (int): Output channel.
-        kernel_size (int): Input kernel size.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
-    """
-
-    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
-        super(ConvBNReLU, self).__init__()
-        padding = (kernel_size - 1) // 2
-        self.conv = nn.Conv2dBnAct(in_planes, out_planes, kernel_size,
-                                   stride=stride,
-                                   pad_mode='pad',
-                                   padding=padding,
-                                   group=groups,
-                                   has_bn=True,
-                                   activation='relu')
-
-    def construct(self, x):
-        x = self.conv(x)
-        return x
-
-
-class InvertedResidual(nn.Cell):
-    """
-    Mobilenetv2 residual block definition.
-
-    Args:
-        inp (int): Input channel.
-        oup (int): Output channel.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        expand_ratio (int): expand ration of input channel
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResidualBlock(3, 256, 1, 1)
-    """
-
-    def __init__(self, inp, oup, stride, expand_ratio):
-        super(InvertedResidual, self).__init__()
-        assert stride in [1, 2]
-
-        hidden_dim = int(round(inp * expand_ratio))
-        self.use_res_connect = stride == 1 and inp == oup
-
-        layers = []
-        if expand_ratio != 1:
-            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
-        layers.extend([
-            # dw
-            ConvBNReLU(hidden_dim, hidden_dim,
-                       stride=stride, groups=hidden_dim),
-            # pw-linear
-            nn.Conv2dBnAct(hidden_dim, oup, kernel_size=1, stride=1,
-                           pad_mode='pad', padding=0, group=1, has_bn=True)
-        ])
-        self.conv = nn.SequentialCell(layers)
-        self.add = P.Add()
-
-    def construct(self, x):
-        out = self.conv(x)
-        if self.use_res_connect:
-            out = self.add(out, x)
-        return out
-
-
-class mobilenetV2(nn.Cell):
-    """
-    mobilenetV2 fusion architecture.
-
-    Args:
-        class_num (Cell): number of classes.
-        width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
-        has_dropout (bool): Is dropout used. Default is false
-        inverted_residual_setting (list): Inverted residual settings. Default is None
-        round_nearest (list): Channel round to . Default is 8
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> mobilenetV2(num_classes=1000)
-    """
-
-    def __init__(self, num_classes=1000, width_mult=1.,
-                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
-        super(mobilenetV2, self).__init__()
-        block = InvertedResidual
-        input_channel = 32
-        last_channel = 1280
-        # setting of inverted residual blocks
-        self.cfgs = inverted_residual_setting
-        if inverted_residual_setting is None:
-            self.cfgs = [
-                # t, c, n, s
-                [1, 16, 1, 1],
-                [6, 24, 2, 2],
-                [6, 32, 3, 2],
-                [6, 64, 4, 2],
-                [6, 96, 3, 1],
-                [6, 160, 3, 2],
-                [6, 320, 1, 1],
-            ]
-
-        # building first layer
-        input_channel = _make_divisible(
-            input_channel * width_mult, round_nearest)
-        self.out_channels = _make_divisible(
-            last_channel * max(1.0, width_mult), round_nearest)
-
-        features = [ConvBNReLU(3, input_channel, stride=2)]
-        # building inverted residual blocks
-        for t, c, n, s in self.cfgs:
-            output_channel = _make_divisible(c * width_mult, round_nearest)
-            for i in range(n):
-                stride = s if i == 0 else 1
-                features.append(
-                    block(input_channel, output_channel, stride, expand_ratio=t))
-                input_channel = output_channel
-        # building last several layers
-        features.append(ConvBNReLU(
-            input_channel, self.out_channels, kernel_size=1))
-        # make it nn.CellList
-        self.features = nn.SequentialCell(features)
-        # mobilenet head
-        head = ([GlobalAvgPooling(),
-                 nn.DenseBnAct(self.out_channels, num_classes,
-                               has_bias=True, has_bn=False)
-                 ] if not has_dropout else
-                [GlobalAvgPooling(),
-                 nn.Dropout(0.2),
-                 nn.DenseBnAct(self.out_channels, num_classes,
-                               has_bias=True, has_bn=False)
-                 ])
-        self.head = nn.SequentialCell(head)
-
-        # init weights
-        self.init_parameters_data()
-        self._initialize_weights()
-
-    def construct(self, x):
-        x = self.features(x)
-        x = self.head(x)
-        return x
-
-    def _initialize_weights(self):
-        """
-        Initialize weights.
-
-        Args:
-
-        Returns:
-            None.
-
-        Examples:
-            >>> _initialize_weights()
-        """
-        self.init_parameters_data()
-        for _, m in self.cells_and_names():
-            np.random.seed(1)
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
-                                            m.weight.data.shape).astype("float32"))
-                m.weight.set_data(w)
-                if m.bias is not None:
-                    m.bias.set_data(
-                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.Conv2dBnAct):
-                n = m.conv.kernel_size[0] * \
-                    m.conv.kernel_size[1] * m.conv.out_channels
-                w = Tensor(np.random.normal(0, np.sqrt(2. / n),
-                                            m.conv.weight.data.shape).astype("float32"))
-                m.conv.weight.set_data(w)
-                if m.conv.bias is not None:
-                    m.conv.bias.set_data(
-                        Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.BatchNorm2d):
-                m.gamma.set_data(
-                    Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
-                m.beta.set_data(
-                    Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
-            elif isinstance(m, nn.Dense):
-                m.weight.set_data(Tensor(np.random.normal(
-                    0, 0.01, m.weight.data.shape).astype("float32")))
-                if m.bias is not None:
-                    m.bias.set_data(
-                        Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
-            elif isinstance(m, nn.DenseBnAct):
-                m.dense.weight.set_data(
-                    Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32")))
-                if m.dense.bias is not None:
-                    m.dense.bias.set_data(
-                        Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32")))
diff --git a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
deleted file mode 100644
index 90b4162d54c..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Mobilenetv2_quant on Cifar10"""
-
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.common import set_seed
-
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropyWithLabelSmooth
-from mobilenetV2 import mobilenetV2
-
-config_ascend_quant = ed({
-    "num_classes": 10,
-    "image_height": 224,
-    "image_width": 224,
-    "batch_size": 200,
-    "step_threshold": 10,
-    "data_load_mode": "mindata",
-    "epoch_size": 1,
-    "start_epoch": 200,
-    "warmup_epochs": 1,
-    "lr": 0.3,
-    "momentum": 0.9,
-    "weight_decay": 4e-5,
-    "label_smooth": 0.1,
-    "loss_scale": 1024,
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 300,
-    "save_checkpoint_path": "./checkpoint",
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-
-def train():
-    """train"""
-    config = config_ascend_quant
-    print("training configure: {}".format(config))
-
-    epoch_size = config.epoch_size
-
-    # define network
-    network = mobilenetV2(num_classes=config.num_classes)
-    # define loss
-    if config.label_smooth > 0:
-        loss = CrossEntropyWithLabelSmooth(
-            smooth_factor=config.label_smooth, num_classes=config.num_classes)
-    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[True, False])
-    network = quantizer.quantize(network)
-
-    # get learning rate
-    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
-                       lr_init=0,
-                       lr_end=0,
-                       lr_max=config.lr,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=epoch_size + config.start_epoch,
-                       steps_per_epoch=step_size))
-
-    # define optimization
-    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
-                      config.weight_decay)
-    # define model
-    model = Model(network, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-    callback = [monitor]
-    model.train(epoch_size, dataset, callbacks=callback,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-
-    export_time_used = 650
-    train_time = monitor.step_mseconds
-    print('train_time_used:{}'.format(train_time))
-    assert train_time < export_time_used
-    expect_avg_step_loss = 2.32
-    avg_step_loss = np.mean(np.array(monitor.losses))
-    print("average step loss:{}".format(avg_step_loss))
-    assert avg_step_loss < expect_avg_step_loss
-
-
-@pytest.mark.level0
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_single
-def test_mobilenetv2_quant():
-    """
-    test_mobilenetv2_quant
-    Features: test_mobilenetv2_quant
-    Description: test_mobilenetv2_quant graph mode
-    Expectation: None
-    """
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    train()
-
-
-if __name__ == '__main__':
-    test_mobilenetv2_quant()
diff --git a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py b/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
deleted file mode 100644
index a6dfca916f8..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/test_mobilenetv2_quant_gpu.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Mobilenetv2_quant gpu on Cifar10"""
-
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore.common import set_seed
-
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropyWithLabelSmooth
-from mobilenetV2 import mobilenetV2
-
-config_ascend_quant = ed({
-    "num_classes": 10,
-    "image_height": 224,
-    "image_width": 224,
-    "batch_size": 300,
-    "step_threshold": 10,
-    "data_load_mode": "mindata",
-    "epoch_size": 1,
-    "start_epoch": 200,
-    "warmup_epochs": 1,
-    "lr": 0.05,
-    "momentum": 0.997,
-    "weight_decay": 4e-5,
-    "label_smooth": 0.1,
-    "loss_scale": 1024,
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 300,
-    "save_checkpoint_path": "./checkpoint",
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-@pytest.mark.level2
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_single
-def test_mobilenetv2_quant():
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    config = config_ascend_quant
-    print("training configure: {}".format(config))
-
-    epoch_size = config.epoch_size
-
-    # define network
-    network = mobilenetV2(num_classes=config.num_classes)
-    # define loss
-    if config.label_smooth > 0:
-        loss = CrossEntropyWithLabelSmooth(
-            smooth_factor=config.label_smooth, num_classes=config.num_classes)
-    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[False, False])
-    network = quantizer.quantize(network)
-
-    # get learning rate
-    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
-                       lr_init=0,
-                       lr_end=0,
-                       lr_max=config.lr,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=epoch_size + config.start_epoch,
-                       steps_per_epoch=step_size))
-
-    # define optimization
-    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
-                      config.weight_decay)
-    # define model
-    model = Model(network, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-    callback = [monitor]
-    model.train(epoch_size, dataset, callbacks=callback,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-    train_time = monitor.step_mseconds
-    print('train_time_used:{}'.format(train_time))
-    avg_step_loss = np.mean(np.array(monitor.losses))
-    print("average step loss:{}".format(avg_step_loss))
-    expect_avg_step_loss = 2.32
-    assert avg_step_loss < expect_avg_step_loss
-    export_time_used = 960
-    assert train_time < export_time_used
-
-if __name__ == '__main__':
-    test_mobilenetv2_quant()
diff --git a/tests/st/quantization/mobilenetv2_quant/utils.py b/tests/st/quantization/mobilenetv2_quant/utils.py
deleted file mode 100644
index 2849cb211e3..00000000000
--- a/tests/st/quantization/mobilenetv2_quant/utils.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""MobileNetV2 utils"""
-
-import time
-import numpy as np
-
-from mindspore.train.callback import Callback
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.nn.loss.loss import LossBase
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.common import dtype as mstype
-
-
-class Monitor(Callback):
-    """
-    Monitor loss and time.
-
-    Args:
-        lr_init (numpy array): train lr
-
-    Returns:
-        None
-
-    Examples:
-        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
-    """
-
-    def __init__(self, lr_init=None, step_threshold=10):
-        super(Monitor, self).__init__()
-        self.lr_init = lr_init
-        self.lr_init_len = len(lr_init)
-        self.step_threshold = step_threshold
-        self.step_mseconds = 50000
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
-                                                                                      per_step_mseconds,
-                                                                                      np.mean(self.losses)))
-        self.epoch_mseconds = epoch_mseconds
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        self.step_mseconds = min(self.step_mseconds, step_mseconds)
-        step_loss = cb_params.net_outputs
-
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
-
-        self.losses.append(step_loss)
-        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
-
-        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
-            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
-            1, cb_params.batch_num, step_loss,
-            np.mean(self.losses), self.step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
-
-        if cb_params.cur_step_num == self.step_threshold:
-            run_context.request_stop()
-
-
-class CrossEntropyWithLabelSmooth(LossBase):
-    """
-    CrossEntropyWith LabelSmooth.
-
-    Args:
-        smooth_factor (float): smooth factor, default=0.
-        num_classes (int): num classes
-
-    Returns:
-        None.
-
-    Examples:
-        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
-    """
-
-    def __init__(self, smooth_factor=0., num_classes=1000):
-        super(CrossEntropyWithLabelSmooth, self).__init__()
-        self.onehot = P.OneHot()
-        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * smooth_factor /
-                                (num_classes - 1), mstype.float32)
-        self.ce = nn.SoftmaxCrossEntropyWithLogits()
-        self.mean = P.ReduceMean(False)
-        self.cast = P.Cast()
-
-    def construct(self, logit, label):
-        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
-                                    self.on_value, self.off_value)
-        out_loss = self.ce(logit, one_hot_label)
-        out_loss = self.mean(out_loss, 0)
-        return out_loss
diff --git a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
index 10e9054f3c5..f8b3507fa5b 100644
--- a/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
+++ b/tests/st/quantization/ops/test_Conv2dBnFoldQuant.py
@@ -22,20 +22,18 @@ from mindspore import nn
 from mindspore import context
 from mindspore import Tensor
 from mindspore.common import set_seed
-from mindspore.compression.quant import create_quant_config
 
 class Net(nn.Cell):
-    def __init__(self, qconfig):
+    def __init__(self):
         super(Net, self).__init__()
-        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1),
-                                         pad_mode='valid', quant_config=qconfig)
+        self.conv = nn.Conv2dBnFoldQuant(2, 3, kernel_size=(2, 2), stride=(1, 1), pad_mode='valid')
     def construct(self, x):
         return self.conv(x)
 
+
 def test_conv2d_bn_fold_quant():
     set_seed(1)
-    quant_config = create_quant_config()
-    network = Net(quant_config)
+    network = Net()
     inputs = Tensor(np.ones([1, 2, 5, 5]).astype(np.float32))
     label = Tensor(np.ones([1, 3, 4, 4]).astype(np.int32))
     opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), learning_rate=0.1, momentum=0.9)
@@ -44,11 +42,13 @@ def test_conv2d_bn_fold_quant():
     train_network = nn.TrainOneStepCell(net_with_loss, opt)
     train_network.set_train()
     out_loss = train_network(inputs, label)
+    print("------------------", out_loss.asnumpy())
     expect_loss = np.array([0.940427])
     error = np.array([0.1])
     diff = out_loss.asnumpy() - expect_loss
     assert np.all(abs(diff) < error)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
diff --git a/tests/st/quantization/resnet50_quant/dataset.py b/tests/st/quantization/resnet50_quant/dataset.py
deleted file mode 100755
index 69a1f774819..00000000000
--- a/tests/st/quantization/resnet50_quant/dataset.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2020-2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-""" create train dataset. """
-
-from functools import partial
-
-import mindspore.common.dtype as mstype
-import mindspore.dataset as ds
-import mindspore.dataset.transforms as C2
-import mindspore.dataset.vision as C
-
-
-def create_dataset(dataset_path, config, repeat_num=1, batch_size=32):
-    """
-    create a train dataset
-
-    Args:
-        dataset_path(string): the path of dataset.
-        config(EasyDict)：the basic config for training
-        repeat_num(int): the repeat times of dataset. Default: 1.
-        batch_size(int): the batch size of dataset. Default: 32.
-
-    Returns:
-        dataset
-    """
-
-    load_func = partial(ds.Cifar10Dataset, dataset_path)
-    data_set = load_func(num_parallel_workers=8, shuffle=False)
-
-    resize_height = config.image_height
-    resize_width = config.image_width
-
-    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
-    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
-
-    # define map operations
-    resize_op = C.Resize((resize_height, resize_width))
-    normalize_op = C.Normalize(mean=mean, std=std)
-    changeswap_op = C.HWC2CHW()
-    c_trans = [resize_op, normalize_op, changeswap_op]
-
-    type_cast_op = C2.TypeCast(mstype.int32)
-
-    data_set = data_set.map(operations=c_trans, input_columns="image",
-                            num_parallel_workers=8)
-    data_set = data_set.map(operations=type_cast_op,
-                            input_columns="label", num_parallel_workers=8)
-
-    # apply batch operations
-    data_set = data_set.batch(batch_size, drop_remainder=True)
-
-    # apply dataset repeat operation
-    data_set = data_set.repeat(repeat_num)
-
-    return data_set
diff --git a/tests/st/quantization/resnet50_quant/lr_generator.py b/tests/st/quantization/resnet50_quant/lr_generator.py
deleted file mode 100755
index fe2a971ebfc..00000000000
--- a/tests/st/quantization/resnet50_quant/lr_generator.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""learning rate generator"""
-
-import math
-import numpy as np
-
-
-def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
-    """
-    generate learning rate array
-
-    Args:
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-       lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or default
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    if lr_decay_mode == 'steps':
-        decay_epoch_index = [0.3 * total_steps,
-                             0.6 * total_steps, 0.8 * total_steps]
-        for i in range(total_steps):
-            if i < decay_epoch_index[0]:
-                lr = lr_max
-            elif i < decay_epoch_index[1]:
-                lr = lr_max * 0.1
-            elif i < decay_epoch_index[2]:
-                lr = lr_max * 0.01
-            else:
-                lr = lr_max * 0.001
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'poly':
-        if warmup_steps != 0:
-            inc_each_step = (float(lr_max) - float(lr_init)) / \
-                float(warmup_steps)
-        else:
-            inc_each_step = 0
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = float(lr_init) + inc_each_step * float(i)
-            else:
-                base = (1.0 - (float(i) - float(warmup_steps)) /
-                        (float(total_steps) - float(warmup_steps)))
-                lr = float(lr_max) * base * base
-                if lr < 0.0:
-                    lr = 0.0
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'cosine':
-        decay_steps = total_steps - warmup_steps
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
-                lr = float(lr_init) + lr_inc * (i + 1)
-            else:
-                linear_decay = (total_steps - i) / decay_steps
-                cosine_decay = 0.5 * \
-                    (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
-                decayed = linear_decay * cosine_decay + 0.00001
-                lr = lr_max * decayed
-            lr_each_step.append(lr)
-    else:
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-            else:
-                lr = lr_max - (lr_max - lr_end) * \
-                    (i - warmup_steps) / (total_steps - warmup_steps)
-            lr_each_step.append(lr)
-
-    learning_rate = np.array(lr_each_step).astype(np.float32)
-
-    return learning_rate
diff --git a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py b/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
deleted file mode 100644
index 32693785c24..00000000000
--- a/tests/st/quantization/resnet50_quant/resnet_quant_manual.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""ResNet."""
-import numpy as np
-import mindspore.nn as nn
-import mindspore.common.initializer as weight_init
-from mindspore.ops import operations as P
-from mindspore import Tensor
-from mindspore.nn import FakeQuantWithMinMaxObserver, Conv2dBnFoldQuant
-from mindspore.compression.quant import create_quant_config
-
-_ema_decay = 0.999
-_symmetric = True
-_fake = True
-_per_channel = True
-_quant_config = create_quant_config(per_channel=(_per_channel, False), symmetric=(_symmetric, False))
-
-
-def _weight_variable(shape, factor=0.01):
-    init_value = np.random.randn(*shape).astype(np.float32) * factor
-    return Tensor(init_value)
-
-
-def _conv3x3(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 3, 3)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _conv1x1(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 1, 1)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _conv7x7(in_channel, out_channel, stride=1):
-    weight_shape = (out_channel, in_channel, 7, 7)
-    weight = _weight_variable(weight_shape)
-    return nn.Conv2d(in_channel, out_channel,
-                     kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight)
-
-
-def _bn(channel):
-    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
-                          gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
-
-
-def _bn_last(channel):
-    return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9,
-                          gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
-
-
-def _fc(in_channel, out_channel):
-    weight_shape = (out_channel, in_channel)
-    weight = _weight_variable(weight_shape)
-    return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0)
-
-
-class ConvBNReLU(nn.Cell):
-    """
-    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
-
-    Args:
-        in_planes (int): Input channel.
-        out_planes (int): Output channel.
-        kernel_size (int): Input kernel size.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
-    """
-
-    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
-        super(ConvBNReLU, self).__init__()
-        padding = (kernel_size - 1) // 2
-        conv = Conv2dBnFoldQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
-                                 group=groups, fake=_fake, quant_config=_quant_config)
-        layers = [conv, nn.ActQuant(nn.ReLU())] if _fake else [conv, nn.ReLU()]
-        self.features = nn.SequentialCell(layers)
-
-    def construct(self, x):
-        output = self.features(x)
-        return output
-
-
-class ResidualBlock(nn.Cell):
-    """
-    ResNet V1 residual block definition.
-
-    Args:
-        in_channel (int): Input channel.
-        out_channel (int): Output channel.
-        stride (int): Stride size for the first convolutional layer. Default: 1.
-
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResidualBlock(3, 256, stride=2)
-    """
-    expansion = 4
-
-    def __init__(self,
-                 in_channel,
-                 out_channel,
-                 stride=1):
-        super(ResidualBlock, self).__init__()
-
-        channel = out_channel // self.expansion
-        self.conv1 = ConvBNReLU(in_channel, channel, kernel_size=1, stride=1)
-        self.conv2 = ConvBNReLU(channel, channel, kernel_size=3, stride=stride)
-        self.conv3 = nn.SequentialCell([Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
-                                                          quant_config=_quant_config,
-                                                          kernel_size=1, stride=1, pad_mode='same', padding=0),
-                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay, symmetric=False)
-                                        ]) if _fake else Conv2dBnFoldQuant(channel, out_channel, fake=_fake,
-                                                                           quant_config=_quant_config,
-                                                                           kernel_size=1, stride=1,
-                                                                           pad_mode='same', padding=0)
-
-        self.down_sample = False
-
-        if stride != 1 or in_channel != out_channel:
-            self.down_sample = True
-        self.down_sample_layer = None
-
-        if self.down_sample:
-            self.down_sample_layer = nn.SequentialCell([Conv2dBnFoldQuant(in_channel, out_channel,
-                                                                          quant_config=_quant_config,
-                                                                          kernel_size=1, stride=stride,
-                                                                          pad_mode='same', padding=0),
-                                                        FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay,
-                                                                                    symmetric=False)
-                                                        ]) if _fake else Conv2dBnFoldQuant(in_channel, out_channel,
-                                                                                           fake=_fake,
-                                                                                           quant_config=_quant_config,
-                                                                                           kernel_size=1,
-                                                                                           stride=stride,
-                                                                                           pad_mode='same',
-                                                                                           padding=0)
-        self.add = nn.TensorAddQuant()
-        self.relu = P.ReLU()
-
-    def construct(self, x):
-        identity = x
-        out = self.conv1(x)
-        out = self.conv2(out)
-        out = self.conv3(out)
-
-        if self.down_sample:
-            identity = self.down_sample_layer(identity)
-
-        out = self.add(out, identity)
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Cell):
-    """
-    ResNet architecture.
-
-    Args:
-        block (Cell): Block for network.
-        layer_nums (list): Numbers of block in different layers.
-        in_channels (list): Input channel in each layer.
-        out_channels (list): Output channel in each layer.
-        strides (list):  Stride size in each layer.
-        num_classes (int): The number of classes that the training images are belonging to.
-    Returns:
-        Tensor, output tensor.
-
-    Examples:
-        >>> ResNet(ResidualBlock,
-        >>>        [3, 4, 6, 3],
-        >>>        [64, 256, 512, 1024],
-        >>>        [256, 512, 1024, 2048],
-        >>>        [1, 2, 2, 2],
-        >>>        10)
-    """
-
-    def __init__(self,
-                 block,
-                 layer_nums,
-                 in_channels,
-                 out_channels,
-                 strides,
-                 num_classes):
-        super(ResNet, self).__init__()
-
-        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
-            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
-
-        self.conv1 = ConvBNReLU(3, 64, kernel_size=7, stride=2)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
-
-        self.layer1 = self._make_layer(block,
-                                       layer_nums[0],
-                                       in_channel=in_channels[0],
-                                       out_channel=out_channels[0],
-                                       stride=strides[0])
-        self.layer2 = self._make_layer(block,
-                                       layer_nums[1],
-                                       in_channel=in_channels[1],
-                                       out_channel=out_channels[1],
-                                       stride=strides[1])
-        self.layer3 = self._make_layer(block,
-                                       layer_nums[2],
-                                       in_channel=in_channels[2],
-                                       out_channel=out_channels[2],
-                                       stride=strides[2])
-        self.layer4 = self._make_layer(block,
-                                       layer_nums[3],
-                                       in_channel=in_channels[3],
-                                       out_channel=out_channels[3],
-                                       stride=strides[3])
-
-        self.mean = P.ReduceMean(keep_dims=True)
-        self.flatten = nn.Flatten()
-        self.end_point = nn.DenseQuant(out_channels[3], num_classes, has_bias=True, quant_config=_quant_config)
-        self.output_fake = nn.FakeQuantWithMinMaxObserver(ema=True, ema_decay=_ema_decay)
-
-        # init weights
-        self._initialize_weights()
-
-    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
-        """
-        Make stage network of ResNet.
-
-        Args:
-            block (Cell): Resnet block.
-            layer_num (int): Layer number.
-            in_channel (int): Input channel.
-            out_channel (int): Output channel.
-            stride (int): Stride size for the first convolutional layer.
-
-        Returns:
-            SequentialCell, the output layer.
-
-        Examples:
-            >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
-        """
-        layers = []
-
-        resnet_block = block(in_channel, out_channel, stride=stride)
-        layers.append(resnet_block)
-
-        for _ in range(1, layer_num):
-            resnet_block = block(out_channel, out_channel, stride=1)
-            layers.append(resnet_block)
-
-        return nn.SequentialCell(layers)
-
-    def construct(self, x):
-        x = self.conv1(x)
-        c1 = self.maxpool(x)
-
-        c2 = self.layer1(c1)
-        c3 = self.layer2(c2)
-        c4 = self.layer3(c3)
-        c5 = self.layer4(c4)
-
-        out = self.mean(c5, (2, 3))
-        out = self.flatten(out)
-        out = self.end_point(out)
-        out = self.output_fake(out)
-        return out
-
-    def _initialize_weights(self):
-
-        self.init_parameters_data()
-        for _, m in self.cells_and_names():
-            np.random.seed(1)
-
-            if isinstance(m, nn.Conv2dBnFoldQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-            elif isinstance(m, nn.DenseQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-            elif isinstance(m, nn.Conv2dBnWithoutFoldQuant):
-                m.weight.set_data(weight_init.initializer(weight_init.Normal(),
-                                                          m.weight.shape,
-                                                          m.weight.dtype))
-
-
-def resnet50_quant(class_num=10):
-    """
-    Get ResNet50 neural network.
-
-    Args:
-        class_num (int): Class number.
-
-    Returns:
-        Cell, cell instance of ResNet50 neural network.
-
-    Examples:
-        >>> net = resnet50_quant(10)
-    """
-    return ResNet(ResidualBlock,
-                  [3, 4, 6, 3],
-                  [64, 256, 512, 1024],
-                  [256, 512, 1024, 2048],
-                  [1, 2, 2, 2],
-                  class_num)
-
-
-def resnet101_quant(class_num=1001):
-    """
-    Get ResNet101 neural network.
-
-    Args:
-        class_num (int): Class number.
-
-    Returns:
-        Cell, cell instance of ResNet101 neural network.
-
-    Examples:
-        >>> net = resnet101(1001)
-    """
-    return ResNet(ResidualBlock,
-                  [3, 4, 23, 3],
-                  [64, 256, 512, 1024],
-                  [256, 512, 1024, 2048],
-                  [1, 2, 2, 2],
-                  class_num)
diff --git a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py b/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
deleted file mode 100755
index d5a2971de23..00000000000
--- a/tests/st/quantization/resnet50_quant/test_resnet50_quant.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Train Resnet50_quant on Cifar10"""
-
-import pytest
-import numpy as np
-from easydict import EasyDict as ed
-
-from mindspore import context
-from mindspore import Tensor
-from mindspore.nn.optim.momentum import Momentum
-from mindspore.train.model import Model
-from mindspore.compression.quant import QuantizationAwareTraining
-from mindspore import set_seed
-
-from resnet_quant_manual import resnet50_quant
-from dataset import create_dataset
-from lr_generator import get_lr
-from utils import Monitor, CrossEntropy
-
-
-config_quant = ed({
-    "class_num": 10,
-    "batch_size": 128,
-    "step_threshold": 20,
-    "loss_scale": 1024,
-    "momentum": 0.9,
-    "weight_decay": 1e-4,
-    "epoch_size": 1,
-    "pretrained_epoch_size": 90,
-    "buffer_size": 1000,
-    "image_height": 224,
-    "image_width": 224,
-    "data_load_mode": "original",
-    "save_checkpoint": True,
-    "save_checkpoint_epochs": 1,
-    "keep_checkpoint_max": 50,
-    "save_checkpoint_path": "./",
-    "warmup_epochs": 0,
-    "lr_decay_mode": "cosine",
-    "use_label_smooth": True,
-    "label_smooth_factor": 0.1,
-    "lr_init": 0,
-    "lr_max": 0.005,
-})
-
-dataset_path = "/home/workspace/mindspore_dataset/cifar-10-batches-bin/"
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_resnet50_quant():
-    set_seed(1)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    config = config_quant
-    print("training configure: {}".format(config))
-    epoch_size = config.epoch_size
-
-    # define network
-    net = resnet50_quant(class_num=config.class_num)
-    net.set_train(True)
-
-    # define loss
-    if not config.use_label_smooth:
-        config.label_smooth_factor = 0.0
-    loss = CrossEntropy(
-        smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
-    #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
-
-    # define dataset
-    dataset = create_dataset(dataset_path=dataset_path,
-                             config=config,
-                             repeat_num=1,
-                             batch_size=config.batch_size)
-    step_size = dataset.get_dataset_size()
-
-    # convert fusion network to quantization aware network
-    quantizer = QuantizationAwareTraining(bn_fold=True,
-                                          per_channel=[True, False],
-                                          symmetric=[True, False])
-    net = quantizer.quantize(net)
-
-    # get learning rate
-    lr = Tensor(get_lr(lr_init=config.lr_init,
-                       lr_end=0.0,
-                       lr_max=config.lr_max,
-                       warmup_epochs=config.warmup_epochs,
-                       total_epochs=config.epoch_size,
-                       steps_per_epoch=step_size,
-                       lr_decay_mode='cosine'))
-
-    # define optimization
-    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
-                   config.weight_decay, config.loss_scale)
-
-    # define model
-    #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
-    model = Model(net, loss_fn=loss, optimizer=opt)
-
-    print("============== Starting Training ==============")
-    monitor = Monitor(lr_init=lr.asnumpy(),
-                      step_threshold=config.step_threshold)
-
-    callbacks = [monitor]
-    model.train(epoch_size, dataset, callbacks=callbacks,
-                dataset_sink_mode=False)
-    print("============== End Training ==============")
-
-    expect_avg_step_loss = 2.60
-    avg_step_loss = np.mean(np.array(monitor.losses))
-
-    print("average step loss:{}".format(avg_step_loss))
-    assert avg_step_loss < expect_avg_step_loss
-
-
-if __name__ == '__main__':
-    test_resnet50_quant()
diff --git a/tests/st/quantization/resnet50_quant/utils.py b/tests/st/quantization/resnet50_quant/utils.py
deleted file mode 100644
index e3f3c26d864..00000000000
--- a/tests/st/quantization/resnet50_quant/utils.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Resnet50 utils"""
-
-import time
-import numpy as np
-
-from mindspore.train.callback import Callback
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.nn.loss.loss import LossBase
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.common import dtype as mstype
-
-
-class Monitor(Callback):
-    """
-    Monitor loss and time.
-
-    Args:
-        lr_init (numpy array): train lr
-
-    Returns:
-        None
-
-    Examples:
-        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
-    """
-
-    def __init__(self, lr_init=None, step_threshold=10):
-        super(Monitor, self).__init__()
-        self.lr_init = lr_init
-        self.lr_init_len = len(lr_init)
-        self.step_threshold = step_threshold
-
-    def epoch_begin(self, run_context):
-        self.losses = []
-        self.epoch_time = time.time()
-
-    def epoch_end(self, run_context):
-        cb_params = run_context.original_args()
-
-        epoch_mseconds = (time.time() - self.epoch_time) * 1000
-        per_step_mseconds = epoch_mseconds / cb_params.batch_num
-        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:8.6f}".format(epoch_mseconds,
-                                                                                      per_step_mseconds,
-                                                                                      np.mean(self.losses)))
-        self.epoch_mseconds = epoch_mseconds
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        step_mseconds = (time.time() - self.step_time) * 1000
-        step_loss = cb_params.net_outputs
-
-        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
-            step_loss = step_loss[0]
-        if isinstance(step_loss, Tensor):
-            step_loss = np.mean(step_loss.asnumpy())
-
-        self.losses.append(step_loss)
-        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
-
-        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:8.6f}/{:8.6f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
-            cb_params.cur_epoch_num, cb_params.epoch_num, cur_step_in_epoch +
-            1, cb_params.batch_num, step_loss,
-            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
-
-        if cb_params.cur_step_num == self.step_threshold:
-            run_context.request_stop()
-
-
-class CrossEntropy(LossBase):
-    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
-
-    def __init__(self, smooth_factor=0, num_classes=1001):
-        super(CrossEntropy, self).__init__()
-        self.onehot = P.OneHot()
-        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * smooth_factor /
-                                (num_classes - 1), mstype.float32)
-        self.ce = nn.SoftmaxCrossEntropyWithLogits()
-        self.mean = P.ReduceMean(False)
-
-    def construct(self, logit, label):
-        one_hot_label = self.onehot(label, F.shape(
-            logit)[1], self.on_value, self.off_value)
-        loss = self.ce(logit, one_hot_label)
-        loss = self.mean(loss, 0)
-        return loss