remove ge depend in cpu

2020-03-23 17:33:56 +08:00 · 2020-03-23 17:33:56 +08:00 · 73ba399364
parent 0f2ed0b134
commit 73ba399364
92 changed files with 1574 additions and 1237 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -42,11 +42,13 @@ else()
    include(${CMAKE_SOURCE_DIR}/cmake/dependency_graphengine.cmake)
 endif()

-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc)
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain)
+if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc)
+    include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain)
+endif()

 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
 add_subdirectory(mindspore/ccsrc)
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@ -40,7 +40,7 @@ if (ENABLE_GE)
    include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include)
    include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external)
    include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external/graph)
-else()
+elseif(ENABLE_D OR ENABLE_TESTCASES)
    include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc)
    include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/ops)
    include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/external)
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -34,6 +34,8 @@ if(ENABLE_GPU)
            "device/gpu/*.cu"
            "kernel/gpu/*.cu"
            "kernel/akg/gpu/*.cc"
+            "kernel/akg/akgkernelbuild.cc"
+            "kernel/akg/akg_kernel_attrs_process.cc"
            )
    file(GLOB_RECURSE GPU_KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
            "kernel/gpu/*.cc"
@ -100,14 +102,14 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "debug/*.cc"
        "onnx/onnx_exporter.cc"
        "operator/*.cc"
-        "transform/*.cc"
        "session/kernel_graph.cc"
        "utils/node_utils.cc"
        "session/session_basic.cc"
        "session/session_factory.cc"
        "session/anf_runtime_algorithm.cc"
        "vm/*.cc"
-        "pynative/*.cc"
+        "pynative/base.cc"
+        "pynative/pynative_execute.cc"
        "pybind_api/*.cc"
        "device/common/*.cc"
        "kernel/kernel_query.cc"
@ -117,7 +119,6 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "device/kernel_runtime.cc"
        "device/kernel_runtime_manager.cc"
        "device/convert_tensor_utils.cc"
-        "pre_activate/ascend/*.cc"
        "pre_activate/common/*.cc"
        "pre_activate/pass/*.cc"
        "pre_activate/gpu/*.cc"
@ -168,6 +169,15 @@ if(ENABLE_DUMP_PROTO)
    add_compile_definitions(ENABLE_DUMP_PROTO)
 endif()

+if(ENABLE_GE)
+    file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+            "transform/*.cc"
+            "pynative/pynative_execute_ge.cc"
+            "pipeline/pipeline_ge.cc"
+            )
+    list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST})
+endif()
+
 if(ENABLE_D)
    include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
@ -188,6 +198,9 @@ if(ENABLE_D)
            "device/kernel_adjust.cc"
            "kernel/kernel_fusion.cc"
            "kernel/tbe/*.cc"
+            "pre_activate/ascend/*.cc"
+            "transform/*.cc"
+            "pipeline/pipeline_ge.cc"
            )
    list(APPEND MINDSPORE_SRC_LIST ${D_SRC_LIST})
    list(APPEND MINDSPORE_PROTO_AICPU_LIST ${PROTOSRCS})
@ -246,9 +259,11 @@ if (ENABLE_GE)
        target_link_libraries(mindspore graph ge_client)
    endif()
    target_link_libraries(mindspore tsdclient)
-else()
+elseif(ENABLE_D)
    add_compile_definitions(NO_GE_CLIENT)
    target_link_libraries(mindspore graph)
+else()
+    add_compile_definitions(NO_GE_CLIENT)
 endif()

 if(ENABLE_D)
@ -288,8 +303,6 @@ endif()
 set(PYTHON_MODULE_SOURCE
        pipeline/init.cc
        kernel/oplib/oplib.cc
-        kernel/akg/akgkernelbuild.cc
-        kernel/akg/akg_kernel_attrs_process.cc
    ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
    ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})

@ -350,6 +363,7 @@ if(ENABLE_GPU)
    assign_source_group("Include" ${GROUP_INCLUDE})

    file(GLOB COMPILER_SRCS
+        "pre_activate/gpu/*.cc"
        ${TVM_DIR}/src/api/*.cc
        ${TVM_DIR}/src/arithmetic/*.cc
        ${TVM_DIR}/src/autotvm/*.cc
--- a/mindspore/ccsrc/debug/e2e_dump.cc
+++ b/mindspore/ccsrc/debug/e2e_dump.cc
@ -49,7 +49,7 @@ bool Dump::IsKernelNeedDump(const std::string& kernel_name) {
  return false;
 }

-bool Dump::ParseDumpConfig(const string& dump_config_file) {
+bool Dump::ParseDumpConfig(const std::string& dump_config_file) {
  std::ifstream jsonFile(dump_config_file);
  if (!jsonFile.is_open()) {
    MS_LOG(ERROR) << dump_config_file << " open failed.";
--- a/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_build_ascend.cc
@ -94,7 +94,7 @@ static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *ke
  return ret;
 }

-static vector<int> CalCleanZerosSize(const CNodePtr &pre_node) {
+static std::vector<int> CalCleanZerosSize(const CNodePtr &pre_node) {
  MS_EXCEPTION_IF_NULL(pre_node);
  std::vector<int> clean_size_list;
  // clean output
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_manager.cc
@ -27,6 +27,7 @@
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "common/utils.h"
+#include "utils/convert_utils.h"

 using std::vector;
 using Json = nlohmann::json;
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
@ -121,8 +121,8 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
    LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs);
  }

-  std::vector<TaskInfoPtr> task_info_ptrs =
-    kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
+  std::vector<TaskInfoPtr> task_info_ptrs = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod)
+                                              ->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
  task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end());
  return true;
 }
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.h
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.h
@ -24,7 +24,7 @@
 #include <vector>
 #include "device/kernel_runtime.h"
 #include "ir/anf.h"
-#include "kernel/kernel.h"
+#include "kernel/ascend_kernel_mod.h"
 #include "framework/ge_runtime/task_info.h"

 namespace mindspore {
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
@ -21,7 +21,6 @@
 #include "kernel/gpu/gpu_kernel_factory.h"
 #include "operator/ops.h"
 #include "pybind11/stl.h"
-#include "transform/convert.h"
 #include "session/anf_runtime_algorithm.h"
 namespace mindspore {
 namespace device {
--- a/mindspore/ccsrc/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/device/gpu/kernel_info_setter.cc
@ -91,7 +91,7 @@ std::string SupportedTypeList(const CNodePtr& kernel_node) {
  return supported_type_lists;
 }

-bool SelectAkgKernel(const CNodePtr& kernel_node, const shared_ptr<KernelBuildInfo>& selected_kernel_info) {
+bool SelectAkgKernel(const CNodePtr& kernel_node, const std::shared_ptr<KernelBuildInfo>& selected_kernel_info) {
  MS_EXCEPTION_IF_NULL(kernel_node);
  MS_EXCEPTION_IF_NULL(selected_kernel_info);
  std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@ -32,6 +32,7 @@
 #include "device/ascend/profiling/profiling_manager.h"
 #include "device/ascend/kernel_select_ascend.h"
 #include "device/kernel_info.h"
+#include "runtime/base.h"

 constexpr auto kLoopCountParamName = "loop_count";
 constexpr auto kIterLoopParamName = "iter_loop";
--- a/mindspore/ccsrc/ir/anf.cc
+++ b/mindspore/ccsrc/ir/anf.cc
@ -197,6 +197,23 @@ PrimitivePtr GetCNodePrimitive(const AnfNodePtr& node) {
  return nullptr;
 }

+std::string GetCNodeFuncName(const CNodePtr cnode) {
+  if (cnode->inputs().empty()) {
+    return "";
+  }
+
+  AnfNodePtr valuenode = cnode->input(0);
+  if (valuenode->isa<ValueNode>()) {
+    auto value = GetValueNode(valuenode);
+    // check whether the valuenode is primitive
+    if (value->isa<Primitive>()) {
+      return value->cast<PrimitivePtr>()->name();
+    }
+    return value->ToString();
+  }
+  return "";
+}
+
 bool IsPrimitive(const AnfNodePtr& node, const PrimitivePtr& value) {
  if (IsValueNode<Primitive>(node)) {
    PrimitivePtr fn_value = GetValueNode<PrimitivePtr>(node);
--- a/mindspore/ccsrc/ir/anf.h
+++ b/mindspore/ccsrc/ir/anf.h
@ -384,6 +384,8 @@ static S GetValue(const ValuePtr &value) {
  return v;
 }

+std::string GetCNodeFuncName(CNodePtr cnode);
+
 // used to check whether an AnfNode is a cnode with a kind of Primitive as first input
 bool IsPrimitiveCNode(const AnfNodePtr &node, const PrimitivePtr &value);

--- a/mindspore/ccsrc/ir/meta_tensor.cc
+++ b/mindspore/ccsrc/ir/meta_tensor.cc
@ -25,7 +25,6 @@
 #include "device/device_address.h"
 #include "pybind_api/api_register.h"
 #include "pybind_api/export_flags.h"
-#include "pynative/pynative_execute.h"
 #include "pipeline/static_analysis/abstract_value.h"

 namespace mindspore {
--- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
+++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.h
@ -18,11 +18,11 @@
 #include <vector>
 #include <memory>
 #include <string>
-#include "kernel/kernel.h"
+#include "kernel/ascend_kernel_mod.h"
 #include "kernel/aicpu/aicpu_util.h"
 namespace mindspore {
 namespace kernel {
-class AicpuOpKernelMod : public KernelMod {
+class AicpuOpKernelMod : public AscendKernelMod {
 public:
  AicpuOpKernelMod();
  ~AicpuOpKernelMod() override;
--- a/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc
+++ b/mindspore/ccsrc/kernel/akg/akgkernelbuild.cc
@ -35,7 +35,6 @@
 #include "utils/convert_utils.h"
 #include "utils/any.h"
 #include "utils/utils.h"
-#include "transform/convert.h"
 #include "session/anf_runtime_algorithm.h"
 #include "kernel/akg/akg_kernel_attrs_process.h"

@ -240,8 +239,8 @@ bool AkgKernelBuild::CreateOutputDescJson(const AnfNodePtr &anf_node, nlohmann::
  return true;
 }

-void GetJson(const AnfNodePtr &anf_node, const vector<int> &dyn_input_sizes, const shared_ptr<OpAttr> &op_attr,
-             nlohmann::json *const attr_json, const ValuePtr &attr_value) {
+void GetJson(const AnfNodePtr &anf_node, const std::vector<int> &dyn_input_sizes,
+             const std::shared_ptr<OpAttr> &op_attr, nlohmann::json *const attr_json, const ValuePtr &attr_value) {
  MS_EXCEPTION_IF_NULL(anf_node);
  MS_EXCEPTION_IF_NULL(op_attr);
  MS_EXCEPTION_IF_NULL(attr_json);
--- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/kernel/ascend_kernel_mod.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
+#define MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
+
+#include <vector>
+#include <memory>
+#include "framework/ge_runtime/task_info.h"
+#include "kernel/kernel.h"
+
+using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
+namespace mindspore {
+namespace kernel {
+class AscendKernelMod : public KernelMod {
+ public:
+  virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                           const std::vector<AddressPtr> &, uint32_t) = 0;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_ASCEND_KERNEL_MOD_H_
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@ -19,7 +19,6 @@
 #include <map>
 #include <iostream>
 #include <fstream>
-#include "runtime/rt.h"
 #include "nlohmann/json.hpp"
 #include "session/anf_runtime_algorithm.h"
 #include "common/utils.h"
@ -490,7 +489,7 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info) {
  if (!filewrite.is_open()) {
    return;
  }
-  filewrite << info << endl;
+  filewrite << info << std::endl;
  filewrite.close();
  if (nullptr == realpath(path.c_str(), real_path)) {
    MS_LOG(DEBUG) << "dir " << path << " does not exit.";
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_gpu_kernel.h
@ -226,12 +226,12 @@ class LstmGpuKernel : public GpuKernel {
  size_t reserved_size_;

  // input desc
-  unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
  cudnnTensorDescriptor_t hx_desc_;
  cudnnTensorDescriptor_t cx_desc_;
  cudnnFilterDescriptor_t w_desc_;
  cudnnDropoutDescriptor_t dropout_desc_;
-  unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
  cudnnTensorDescriptor_t hy_desc_;
  cudnnTensorDescriptor_t cy_desc_;
  cudnnRNNDescriptor_t rnn_desc_;
--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_data_gpu_kernel.h
@ -258,8 +258,8 @@ class LstmGradDataGpuKernel : public GpuKernel {
  cudnnRNNDescriptor_t rnn_desc_;

  // input desc
-  unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
-  unique_ptr<cudnnTensorDescriptor_t[]> dy_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> dy_desc_;
  cudnnTensorDescriptor_t dhy_desc_;
  cudnnTensorDescriptor_t dcy_desc_;
  cudnnFilterDescriptor_t w_desc_;
@ -269,7 +269,7 @@ class LstmGradDataGpuKernel : public GpuKernel {
  cudnnDropoutDescriptor_t dropout_desc_;

  // output desc
-  unique_ptr<cudnnTensorDescriptor_t[]> dx_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> dx_desc_;
  cudnnTensorDescriptor_t dhx_desc_;
  cudnnTensorDescriptor_t dcx_desc_;

--- a/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/lstm_grad_weight_gpu_kernel.h
@ -214,9 +214,9 @@ class LstmGradWeightGpuKernel : public GpuKernel {
  cudnnDropoutDescriptor_t dropout_desc_;

  // input desc
-  unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
  cudnnTensorDescriptor_t hx_desc_;
-  unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
+  std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;

  // output desc
  cudnnFilterDescriptor_t dw_desc_;
--- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.h
@ -23,14 +23,14 @@
 #include <vector>
 #include <algorithm>
 #include <utility>
-#include "kernel/kernel.h"
+#include "kernel/ascend_kernel_mod.h"
 #include "kernel/hccl/hcom_util.h"
 #include "hccl/hcom.h"
 #include "common/utils.h"

 namespace mindspore {
 namespace kernel {
-class HcclKernel : public KernelMod {
+class HcclKernel : public AscendKernelMod {
 public:
  HcclKernel();
  ~HcclKernel() override;
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/kernel/kernel.h
@ -25,7 +25,6 @@
 #include "ir/meta_tensor.h"
 #include "pipeline/static_analysis/dshape.h"
 #include "utils/log_adapter.h"
-#include "framework/ge_runtime/task_info.h"

 namespace mindspore {
 enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AUTO_DIFF_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL };
@ -111,7 +110,6 @@ struct Address {
  size_t size;
 };
 using AddressPtr = std::shared_ptr<Address>;
-using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;

 class KernelMod {
 public:
@ -120,10 +118,6 @@ class KernelMod {
  virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
  virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                      const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) = 0;
-  virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                                           const std::vector<AddressPtr> &, uint32_t) {
-    return {};
-  }
  virtual std::vector<size_t> GenParameters() { return {}; }

  virtual ~KernelMod() = default;
--- a/mindspore/ccsrc/kernel/mng/rt_kernel.h
+++ b/mindspore/ccsrc/kernel/mng/rt_kernel.h
@ -22,12 +22,12 @@
 #include <memory>
 #include <map>
 #include <string>
-#include "kernel/kernel.h"
+#include "kernel/ascend_kernel_mod.h"
 #include "kernel/task_stream.h"

 namespace mindspore {
 namespace kernel {
-class RtKernel : public KernelMod {
+class RtKernel : public AscendKernelMod {
 public:
  RtKernel();
  ~RtKernel() override;
--- a/mindspore/ccsrc/kernel/oplib/oplib.cc
+++ b/mindspore/ccsrc/kernel/oplib/oplib.cc
@ -19,7 +19,7 @@
 #include <unordered_map>
 #include <memory>
 #include "utils/log_adapter.h"
-#include "kernel/oplib/opinfo.h"
+#include "utils/overload.h"
 #include "utils/context/ms_context.h"

 namespace mindspore {
@ -50,7 +50,7 @@ constexpr auto kNeedCompile = "need_compile";
 constexpr auto kShape = "shape";
 std::vector<std::shared_ptr<OpInfo>> OpLib::op_info_;

-string ImplTypeToStr(OpImplyType impl_type) {
+std::string ImplTypeToStr(OpImplyType impl_type) {
  switch (impl_type) {
    case kTBE:
      return kTbe;
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
@ -48,7 +48,7 @@ class TbeKernelBuild {
 private:
  TbeKernelBuild() = default;
  ~TbeKernelBuild() = default;
-  static bool GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
+  static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
                                     size_t *index);
  static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
                                   std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
@ -56,12 +56,13 @@ class TbeKernelBuild {
  static bool GenFusionComputeInputeJson(const mindspore::CNodePtr &cnode,
                                         std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
                                         std::vector<nlohmann::json> *input_desc_list, size_t *index);
-  static void GenDescJson(const shared_ptr<mindspore::AnfNode> &anf_node, size_t out_idx, nlohmann::json *output_desc);
-  static void GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index, size_t output_index,
-                                  nlohmann::json *output_desc);
+  static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t out_idx,
+                          nlohmann::json *output_desc);
+  static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
+                                  size_t output_index, nlohmann::json *output_desc);
  static size_t GetIOSizeImpl(const nlohmann::json &desc);
-  static bool GetInputLayers(const vector<mindspore::AnfNodePtr> &input_nodes,
-                             const vector<mindspore::AnfNodePtr> &compute_nodes,
+  static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
+                             const std::vector<mindspore::AnfNodePtr> &compute_nodes,
                             std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers);
  static bool IsDynamicInput(const CNodePtr &cnode);
  static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
@ -82,15 +83,17 @@ class TbeKernelJsonCreator {
  bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
                      nlohmann::json *attrs_json);
  void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
-  bool GenInputDescJson(const shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
-                        const shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
-                        vector<nlohmann::json> *input_list);
-  bool GenOutputDescJson(const shared_ptr<AnfNode> &anf_node, const vector<std::shared_ptr<OpIOInfo>> &outputs_ptr,
-                         nlohmann::json *outputs_json);
-  bool GenInputList(const shared_ptr<AnfNode> &anf_node, size_t input_tensor_num, const shared_ptr<OpIOInfo> &input_ptr,
-                    size_t *real_input_index, string *op_input_name, vector<nlohmann::json> *input_list);
-  void GenOutputList(const shared_ptr<AnfNode> &anf_node, const size_t &output_obj_num,
-                     const shared_ptr<OpIOInfo> &output_ptr, size_t *output_idx, vector<nlohmann::json> *output_list);
+  bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
+                        const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
+                        std::vector<nlohmann::json> *input_list);
+  bool GenOutputDescJson(const std::shared_ptr<AnfNode> &anf_node,
+                         const std::vector<std::shared_ptr<OpIOInfo>> &outputs_ptr, nlohmann::json *outputs_json);
+  bool GenInputList(const std::shared_ptr<AnfNode> &anf_node, size_t input_tensor_num,
+                    const std::shared_ptr<OpIOInfo> &input_ptr, size_t *real_input_index, string *op_input_name,
+                    std::vector<nlohmann::json> *input_list);
+  void GenOutputList(const std::shared_ptr<AnfNode> &anf_node, const size_t &output_obj_num,
+                     const std::shared_ptr<OpIOInfo> &output_ptr, size_t *output_idx,
+                     std::vector<nlohmann::json> *output_list);
  kCreaterType creater_type_;
  std::string json_name_;
  std::string json_info_;
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.h
@ -21,12 +21,12 @@
 #include <string>
 #include <vector>
 #include <utility>
-#include "kernel/kernel.h"
+#include "kernel/ascend_kernel_mod.h"
 #include "kernel/tbe/tbe_utils.h"

 namespace mindspore {
 namespace kernel {
-class TbeKernelMod : public KernelMod {
+class TbeKernelMod : public AscendKernelMod {
 public:
  explicit TbeKernelMod(KernelPackPtr kernel_pack) : kernel_pack_(std::move(kernel_pack)) {}
  ~TbeKernelMod() override = default;
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_parallel_build.h
@ -55,8 +55,9 @@ class ParallelBuildManager {
  bool WaitOne(int *task_id, char **task_result) const;
  bool IsAllTaskFinish() const;
  std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
-  KernelModPtr GenKernelMod(const string &json_name, const string &processor, const vector<size_t> &input_size_list,
-                            const vector<size_t> &output_size_list, const KernelPackPtr &kernel_pack) const;
+  KernelModPtr GenKernelMod(const string &json_name, const string &processor,
+                            const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
+                            const KernelPackPtr &kernel_pack) const;

 private:
  PyObject *tbe_parallel_compiler_;
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc
@ -168,7 +168,7 @@ bool ParseDynamicFormatJson(const std::string &jsonStr, std::vector<std::shared_
  return true;
 }

-std::string OpSelectFormat(const shared_ptr<AnfNode> &anf_node) {
+std::string OpSelectFormat(const std::shared_ptr<AnfNode> &anf_node) {
  nlohmann::json kernel_json;
  std::string res_json_str;
  TbeKernelJsonCreator creator(OP_SELECT_FORMAT);
@ -182,7 +182,7 @@ std::string OpSelectFormat(const shared_ptr<AnfNode> &anf_node) {
  return res_json_str;
 }

-void SetTidyInputsInfo(const shared_ptr<AnfNode> &anf_node,
+void SetTidyInputsInfo(const std::shared_ptr<AnfNode> &anf_node,
                       const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder,
                       const std::vector<std::shared_ptr<OpIOInfo>> &inputs) {
  std::vector<TypeId> inputs_type;
@ -231,7 +231,7 @@ void SetTidyInputsInfo(const shared_ptr<AnfNode> &anf_node,
  builder->SetInputsFormat(inputs_format);
 }

-void SetTidyOutputsInfo(const shared_ptr<AnfNode> &anf_node,
+void SetTidyOutputsInfo(const std::shared_ptr<AnfNode> &anf_node,
                        const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder,
                        const std::vector<std::shared_ptr<OpIOInfo>> &outputs) {
  std::vector<TypeId> outputs_type;
@ -268,7 +268,8 @@ void SetTidyOutputsInfo(const shared_ptr<AnfNode> &anf_node,
  builder->SetOutputsFormat(outputs_format);
 }

-void GenTidyKernelBuildInfo(const shared_ptr<AnfNode> &anf_node, const std::vector<std::shared_ptr<OpIOInfo>> &inputs,
+void GenTidyKernelBuildInfo(const std::shared_ptr<AnfNode> &anf_node,
+                            const std::vector<std::shared_ptr<OpIOInfo>> &inputs,
                            const std::vector<std::shared_ptr<OpIOInfo>> &outputs) {
  auto builder_tmp = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  builder_tmp->SetKernelType(TBE_KERNEL);
--- a/mindspore/ccsrc/kernel/tbe/tbe_utils.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_utils.cc
@ -26,6 +26,7 @@
 #include <iostream>
 #include <fstream>

+#include "runtime/kernel.h"
 #include "kernel/oplib/oplib.h"
 #include "utils/utils.h"
 #include "session/anf_runtime_algorithm.h"
--- a/mindspore/ccsrc/pipeline/base.h
+++ b/mindspore/ccsrc/pipeline/base.h
@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PIPELINE_BASE_H_
+#define MINDSPORE_CCSRC_PIPELINE_BASE_H_
+
+#include <mutex>
+#include <memory>
+#include <string>
+#include <sstream>
+
+#include "ir/anf.h"
+#include "pipeline/resource.h"
+#include "utils/context/ms_context.h"
+
+namespace mindspore {
+namespace pipeline {
+
+struct ExecutorInfo {
+  FuncGraphPtr func_graph;
+  ResourcePtr resource;
+  std::size_t arg_list_size;
+};
+
+using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>;
+
+inline std::string GetPhasePrefix(const std::string& phase) {
+  auto pos = phase.find('.');
+  if (pos == std::string::npos) {
+    MS_LOG(EXCEPTION) << "phase has no . for prefix" << phase;
+  }
+  return phase.substr(0, pos);
+}
+
+inline std::string GetFilePathName(const std::string& file_name) {
+  std::ostringstream oss;
+  auto ms_context = MsContext::GetInstance();
+  if (ms_context == nullptr) {
+    MS_LOG(EXCEPTION) << "ms_context is nullptr";
+  }
+  auto save_graphs_path = ms_context->save_graphs_path();
+  if (save_graphs_path.empty()) {
+    save_graphs_path = ".";
+  }
+  oss << save_graphs_path << "/" << file_name;
+  return oss.str();
+}
+}  // namespace pipeline
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PIPELINE_BASE_H_
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@ -73,7 +73,7 @@ PYBIND11_MODULE(_c_expression, m) {
         "Get CNode Strategy Dictionary.")
    .def("get_allreduce_fusion", &ExecutorPy::GetAllreduceFusion, py::arg("phase") = py::str("train"),
         "Get Allreduce Fusion Dictionary.")
-    .def("build_data_graph", &ExecutorPy::BuildDFGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
+    .def("build_data_graph", &ExecutorPy::BuildGraph, py::arg("build_params"), py::arg("phase") = py::str("train"),
         py::arg("broadcast_params") = py::dict(), "Build data graph.")
    .def("has_compiled", &ExecutorPy::HasCompiled, py::arg("phase") = py::str(""), "get if cell compiled.")
    .def("run_init_graph", &ExecutorPy::RunInitGraph, "Run init Graph.");
@ -86,19 +86,17 @@ PYBIND11_MODULE(_c_expression, m) {

  (void)m.def("generate_key", &mindspore::pipeline::GenerateKey, "Generate the function graph key.");
  (void)m.def("real_run_op", &mindspore::pynative::RunOp, "Run op pynatively.");
-  (void)m.def("initialize_distribute", &mindspore::pipeline::InitDistribute, "Initialize for Distribute.")
-    .def("init_ge", &mindspore::pipeline::InitGe, "Init GE");
  (void)m.def("reset_op_id", &mindspore::pipeline::ResetOpId, "Reset Operator Id");
  (void)m.def("init_hccl", &mindspore::pipeline::InitHccl, "Init Hccl");
-  (void)m.def("finalize_ge", &mindspore::pipeline::FinalizeGe, "Finalize Ge");
  (void)m.def("finalize_hccl", &mindspore::pipeline::FinalizeHccl, "Finalize Hccl");
-  (void)m.def("set_ge_option", &mindspore::pipeline::SetGeOption, "API for set ge option.");
  (void)m.def("verify_inputs_signature", &mindspore::pipeline::VerifyInputSignature, "Verify input signature.");
  (void)m.def("init_exec_dataset", &mindspore::pipeline::InitExecDataset, py::arg("queue_name"), py::arg("size"),
              py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
              py::arg("phase") = py::str("dataset"), "Init and exec dataset.");
  (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode.");
-  (void)m.def("export_graph", &mindspore::pipeline::ExportDFGraph, "Export Graph.");
+  (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE");
+
+  (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph.");

  (void)py::class_<mindspore::MsContext, std::shared_ptr<mindspore::MsContext>>(m, "MSContext")
    .def_static("get_instance", &mindspore::MsContext::GetInstance, "Get ms context instance.")
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.cc
+++ b/mindspore/ccsrc/pipeline/parse/python_adapter.cc
@ -27,6 +27,7 @@ static std::shared_ptr<py::scoped_interpreter> scoped_ = nullptr;
 //  true: start process from python, false: start process from c++
 static bool python_env_ = false;
 static bool use_signature_in_resolve_ = true;
+void ResetPythonScope() { scoped_ = nullptr; }
 void set_use_signature_in_resolve(bool use_signature) noexcept { use_signature_in_resolve_ = use_signature; }
 bool UseSignatureInResolve() { return use_signature_in_resolve_; }
 void set_python_env_flag(bool python_env) noexcept { python_env_ = python_env; }
--- a/mindspore/ccsrc/pipeline/parse/python_adapter.h
+++ b/mindspore/ccsrc/pipeline/parse/python_adapter.h
@ -55,6 +55,7 @@ void set_use_signature_in_resolve(bool use_signature) noexcept;
 bool UseSignatureInResolve();

 std::shared_ptr<py::scoped_interpreter> set_python_scoped();
+void ResetPythonScope();
 bool IsPythonEnv();
 void SetPythonPath(const std::string& path);
 void set_python_env_flag(bool python_env) noexcept;
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@ -27,11 +27,6 @@
 #include "pipeline/pass.h"
 #include "pipeline/parse/data_converter.h"
 #include "optimizer/ad/dfunctor.h"
-#include "ir/meta_tensor.h"
-#include "transform/convert.h"
-#include "transform/df_graph_manager.h"
-#include "transform/graph_builder.h"
-#include "transform/graph_runner.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
 #include "utils/config_manager.h"
@ -44,6 +39,12 @@
 #include "device/kernel_runtime_manager.h"
 #include "debug/trace.h"

+#if (ENABLE_GE || ENABLE_D)
+#include "pipeline/pipeline_ge.h"
+#include "transform/convert.h"
+#include "transform/df_graph_manager.h"
+#endif
+
 namespace mindspore {
 // namespace to support intermediate representation definition
 namespace pipeline {
@ -54,12 +55,6 @@ using mindspore::abstract::AbstractTensor;
 using mindspore::abstract::AbstractTensorPtr;
 using mindspore::abstract::AbstractTuple;
 using mindspore::abstract::AbstractTuplePtr;
-using mindspore::transform::DfGraphConvertor;
-using mindspore::transform::DfGraphManager;
-using mindspore::transform::GeTensorPtr;
-using mindspore::transform::MeTensorPtr;
-using mindspore::transform::Status;
-using mindspore::transform::TransformUtil;

 const char IR_TYPE_ANF[] = "anf_ir";
 const char IR_TYPE_ONNX[] = "onnx_ir";
@ -85,65 +80,8 @@ std::string GetBaseNameForIR(int stage_idx, const std::string& action_name) {
  oss << save_graphs_path << "/" << stage_idx << "_" << action_name;
  return oss.str();
 }
-
-std::string GetFilePathName(const std::string& file_name) {
-  std::ostringstream oss;
-  auto ms_context = MsContext::GetInstance();
-  if (ms_context == nullptr) {
-    MS_LOG(EXCEPTION) << "ms_context is nullptr";
-  }
-  auto save_graphs_path = ms_context->save_graphs_path();
-  if (save_graphs_path.empty()) {
-    save_graphs_path = ".";
-  }
-  oss << save_graphs_path << "/" << file_name;
-  return oss.str();
-}
 }  // namespace

-// We will not execute graph when output is constant or just input itself.
-static bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr& output, const py::tuple& args,
-                                              const std::shared_ptr<py::object>& ret_val) {
-  if (output->isa<ValueNode>()) {
-    MS_LOG(INFO) << "Graph's output is a constant. No need to execute.";
-    ValuePtr value = GetValueNode(output);
-    *ret_val = ValuePtrToPyData(value);
-    return true;
-  }
-
-  // Adapter will transform values in __init__() and construct() to parameters, this could cause
-  // inputs (a.k.a args in current function) size less than parameters'.
-  if (output->isa<Parameter>()) {
-    MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute.";
-    if (args.empty()) {
-      MS_LOG(EXCEPTION) << "Inputs size is 0, let graph to be executed.";
-    }
-    // Find the right parameter as ret_val.
-    auto func_graph = output->func_graph();
-    MS_EXCEPTION_IF_NULL(func_graph);
-    auto params = func_graph->parameters();
-    if (params.empty()) {
-      MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0";
-    }
-    if (args.size() != params.size()) {
-      MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size()
-                        << ", let graph to be executed.";
-    }
-
-    auto it = std::find(params.begin(), params.end(), output);
-    if (it == params.end()) {
-      MS_EXCEPTION(UnknownError) << "When graph output is Parameter,  it should be found in graph parameters";
-    }
-    size_t index = it - params.cbegin();
-    if (index >= args.size()) {
-      MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size() << ".";
-    }
-    *ret_val = args[index];
-    return true;
-  }
-  return false;
-}
-
 py::tuple GenerateKey(const std::string& name, const std::unordered_map<std::string, py::object>& defaults) {
  MS_LOG(DEBUG) << "GenerateKey args size:" << defaults.size();
  abstract::AbstractBasePtrList args_spec;
@ -207,11 +145,7 @@ py::bool_ VerifyInputSignature(const py::list input_signature, const py::tuple i
  return true;
 }

-ExecutorPy::ExecutorPy() {
-  // because Ge only support one Session exist at the same time ,so we delete the old one
-  DfGraphManager::GetInstance().DeleteGraphRunner();
-  DfGraphManager::GetInstance().DeleteGeSession();
-}
+ExecutorPy::ExecutorPy() {}

 ResourcePtr ExecutorPy::GetResource(const std::string& phase) {
  MS_LOG(DEBUG) << "phase size:" << info_.size();
@ -221,14 +155,6 @@ ResourcePtr ExecutorPy::GetResource(const std::string& phase) {
  return info_[phase]->resource;
 }

-std::string GetPhasePrefix(const std::string& phase) {
-  auto pos = phase.find('.');
-  if (pos == std::string::npos) {
-    MS_LOG(EXCEPTION) << "phase has no . for prefix" << phase;
-  }
-  return phase.substr(0, pos);
-}
-
 FuncGraphPtr ExecutorPy::GetFuncGraph(const std::string& phase) {
  if (info_.count(phase) == 0) {
    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
@ -323,11 +249,15 @@ void ExecutorPy::DelNetRes(const std::string& id) {
      }
    }

+    MS_LOG(INFO) << "Delete flag:" << flag;
+#ifdef ENABLE_GE
    if (flag && info_.size() == 0) {
-      DfGraphManager::GetInstance().DeleteGraphRunner();
-      DfGraphManager::GetInstance().EraseAnfGraph();
-      DfGraphManager::GetInstance().DeleteGeSession();
+      // because Ge only support one Session exist at the same time ,so we delete the old one
+      transform::DfGraphManager::GetInstance().DeleteGraphRunner();
+      transform::DfGraphManager::GetInstance().EraseAnfGraph();
+      transform::DfGraphManager::GetInstance().DeleteGeSession();
    }
+#endif
  }
 }

@ -405,7 +335,8 @@ bool ExecutorPy::CompileInner(const py::object& obj, const py::tuple& args, cons

  use_vm = ChangeExportGeirUseVmFlag(use_vm, phase_s);

-  if (use_vm) {
+  std::string backend = MsContext::GetInstance()->backend_policy();
+  if (use_vm && backend != "ge") {
    // Create backend and session
    resource->results()[kBackend] = compile::CreateBackend();
    p_actions = VmPipeline();
@ -497,30 +428,6 @@ bool ExecutorPy::Compile(const py::object& obj, const py::tuple& args, const py:
  return ret_value;
 }

-void SetGeOption(const std::map<std::string, std::string>& options) {
-  ConfigManager::GetInstance().set_ge_initialize_options(options);
-}
-
-bool InitDistribute(const std::map<std::string, std::string>& options) {
-  ConfigManager::GetInstance().set_parallel_strategy(ParallelStrategy::DISTRIBUTION);
-  MS_LOG(INFO) << "ME run in DISTRIBUTION strategy mode";
-
-  SetGeOption(options);
-#ifdef ENABLE_GE
-  auto ge_options = ConfigManager::GetInstance().ge_initialize_options();
-  {
-    // Release GIL before calling into (potentially long-running) C++ code
-    py::gil_scoped_release release;
-    if (ge::GEInitialize(ge_options) != ge::GRAPH_SUCCESS) {
-      MS_LOG(ERROR) << "Initialize GE failed!";
-      return false;
-    }
-  }
-#endif
-  MS_LOG(DEBUG) << "Initialize Ge success";
-  return true;
-}
-
 #ifdef ENABLE_LOAD_ANF_IR
 // get MindSpore Intermediate Representation File
 std::string GetMsIrFile(void) {
@ -704,9 +611,25 @@ py::object ExecutorPy::Run(const py::tuple& args, const py::object& phase) {
  }
  auto phase_s = py::cast<std::string>(phase);
  std::string backend = MsContext::GetInstance()->backend_policy();
+#ifdef ENABLE_GE
  if (backend == "ge") {
-    return ExecDFGraph(args, phase_s);
+    return ExecDFGraph(info_, args, phase_s);
  }
+#else
+  MS_LOG(WARNING) << "In ut test " << size << phase_s;
+  if (backend == "ge") {
+    std::shared_ptr<py::object> ret_val = std::make_shared<py::object>();
+    if (info_.count(phase_s) != 0 && info_[phase_s]->func_graph != nullptr) {
+      if (IsGraphOutputValueNodeOrParameter(info_[phase_s]->func_graph->output(), args, ret_val)) {
+        return *ret_val;
+      }
+    }
+    if (args.size() > 0) {
+      return args[0];
+    }
+    return args;
+  }
+#endif
  std::size_t full_arg_size = ArgListSize(phase_s);
  if (size > full_arg_size) {
    MS_LOG(WARNING) << "The arg num : size = " << size << ". full_arg_size = " << full_arg_size;
@ -719,435 +642,25 @@ py::object ExecutorPy::Run(const py::tuple& args, const py::object& phase) {
    MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s;
  }

-  MS_LOG(DEBUG) << "eval run";
+  MS_LOG(DEBUG) << "eval run" << backend;
  BaseRef value = (*run)(arg_list);
  MS_LOG(DEBUG) << "run end";
  return BaseRefToPyData(value);
 }

-py::object ExtractGeneralCnodeRet(const AbstractBasePtr& cnode_data, const py::tuple& data, size_t* count) {
-  MS_EXCEPTION_IF_NULL(cnode_data);
-  if (*count >= data.size()) {
-    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
-                      << " less than the number of elements required. ";
-  }
-
-  if (cnode_data->isa<AbstractTensor>()) {
-    BaseShapePtr shape = cnode_data->BuildShape();
-    auto shape_act = shape->cast<abstract::ShapePtr>()->shape();
-    Tensor tensor_exp = py::cast<Tensor>(data[*count]);
-    if (shape_act != tensor_exp.shape()) {
-      MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as "
-                           "the shape of the tensor derived from ME.";
-    }
-    return data[(*count)++];
-  }
-
-  if (!cnode_data->isa<AbstractTuple>()) {
-    MS_LOG(EXCEPTION) << "The output of operator in the final anf graph could "
-                      << "only be a tensor or a tuple of tensor, but got " << cnode_data->BuildValue()->ToString()
-                      << ".";
-  }
-  auto data_tp = cnode_data->cast<AbstractTuplePtr>();
-  auto elements = data_tp->elements();
-  size_t size = data_tp->size();
-  py::tuple tp = py::tuple(size);
-  for (size_t i = 0; i < size; i++) {
-    tp[i] = ExtractGeneralCnodeRet(elements[i], data, count);
-  }
-  return std::move(tp);
-}
-
-py::object StructureOutput(const AnfNodePtr& output_node, const py::tuple& data, size_t* count) {
-  MS_EXCEPTION_IF_NULL(output_node);
-
-  if (output_node->isa<ValueNode>()) {
-    return ValuePtrToPyData(GetValueNode(output_node));
-  }
-
-  if (*count >= data.size()) {
-    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
-                      << " less than the number of elements required. ";
-  }
-  if (output_node->isa<Parameter>()) {
-    return data[(*count)++];
-  }
-
-  auto output_c = output_node->cast<CNodePtr>();
-  if (output_c == nullptr) {
-    MS_LOG(EXCEPTION) << "The final anf graph could only have constant, parameter, and operator, but got "
-                      << output_node->ToString();
-  }
-
-  if (output_c->IsApply(prim::kPrimMakeTuple)) {
-    auto input_list = output_c->inputs();
-    size_t size = input_list.size();
-    py::tuple tp = py::tuple(size - 1);
-    for (size_t i = 1; i < size; i++) {
-      tp[i - 1] = StructureOutput(input_list[i], data, count);
-    }
-    return std::move(tp);
-  }
-  if (output_c->IsApply(prim::kPrimDepend)) {
-    return StructureOutput(output_c->input(1), data, count);
-  }
-
-  return ExtractGeneralCnodeRet(output_c->abstract(), data, count);
-}
-
-std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::vector<MeTensorPtr>& inputs,
-                                        const std::string& phase) {
-  std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
-  if (ge_tensors.size() != inputs.size()) {
-    MS_LOG(ERROR) << "args convert to ge tensor error";
-    return nullptr;
-  }
-
-  std::vector<GeTensorPtr> ge_outputs;
-  transform::RunOptions run_options;
-
-  run_options.name = phase;
-
-  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
-
-  if (graph_runner == nullptr) {
-    MS_LOG(ERROR) << "Can not found GraphRunner";
-    return nullptr;
-  }
-
-  {
-    // Release GIL before calling into (potentially long-running) C++ code
-    py::gil_scoped_release release;
-    MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size();
-    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
-    MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << ge_outputs.size();
-    if (ret != Status::SUCCESS) {
-      MS_LOG(ERROR) << "Exec graph failed";
-      return nullptr;
-    }
-  }
-
-  std::vector<MeTensorPtr> me_outputs = TransformUtil::ConvertGeTensors(ge_outputs);
-  if (me_outputs.size() != ge_outputs.size()) {
-    MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed";
-  }
-
-  py::tuple outputs(me_outputs.size());
-  for (std::size_t i = 0; i < outputs.size(); i++) {
-    outputs[i] = *me_outputs[i];
-  }
-
-  std::shared_ptr<py::object> ret = nullptr;
-
-#ifdef ENABLE_GE
-  AnfNodePtr output_node = graph->get_return()->input(1);
-  MS_EXCEPTION_IF_NULL(output_node);
-  size_t count = 0;
-  py::object oj = StructureOutput(output_node, outputs, &count);
-  ret = std::make_shared<py::object>(oj);
+FuncGraphPtr ExecutorPy::BuildGraph(const py::dict& init_params, const std::string& phase,
+                                    const py::object& broadcast_params) {
+#if (ENABLE_GE || ENABLE_D)
+  return BuildDFGraph(info_, init_params, phase, broadcast_params);
 #else
-  if (outputs.size() == 1) {
-    ret = std::make_shared<py::object>(outputs[0]);
-  } else {
-    ret = std::make_shared<py::object>(outputs);
-  }
+  return nullptr;
 #endif
-
-  return ret;
-}
-
-void DoExecNonInputGraph(const std::string& phase) {
-  std::vector<GeTensorPtr> ge_tensors;
-  std::vector<GeTensorPtr> ge_outputs;
-  transform::RunOptions run_options;
-  run_options.name = phase;
-  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
-
-  if (graph_runner == nullptr) {
-    MS_LOG(ERROR) << "Can not found GraphRunner";
-    return;
-  }
-  {
-    // Release GIL before calling into (potentially long-running) C++ code
-    py::gil_scoped_release release;
-    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
-    if (ret != Status::SUCCESS) {
-      MS_LOG(ERROR) << "Exec graph:" << run_options.name << " failed";
-      return;
-    }
-  }
-}
-
-void ExecutorPy::ProcessGeArg(const py::tuple& args, const std::string& phase, std::vector<tensor::TensorPtr>* inputs) {
-  // check the arg and use the ExecutorPy args
-  std::size_t size = args.size();
-  if (size != ArgListSize(phase)) {
-    MS_LOG(EXCEPTION) << "The real arg num : size = " << size << ". graph_arg_size = " << ArgListSize(phase);
-  }
-
-  // process the first args of tensor
-  // only in Dataset Feed Mode, fp_bp graph need input tensors
-  if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) {
-    for (std::size_t i = 0; i < size; i++) {
-      ValuePtr converted = nullptr;
-      bool succ = parse::ConvertData(args[i], &converted);
-      if (!succ) {
-        MS_LOG(EXCEPTION) << "args convert error";
-      }
-      if (converted->isa<tensor::Tensor>()) {
-        (*inputs).push_back(converted->cast<tensor::TensorPtr>());
-      } else {
-        MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor";
-      }
-    }
-  }
-}
-
-py::object ExecutorPy::ExecDFGraph(const py::tuple& args, const std::string& phase) {
-  std::string phase_prefix = GetPhasePrefix(phase);
-
-  if (phase_prefix == "save") {
-    DoExecNonInputGraph(phase);
-    ConfigManager::GetInstance().ResetConfig();
-    return py::none();
-  }
-
-  if (info_.count(phase) == 0) {
-    MS_LOG(EXCEPTION) << "has no phase:" << phase;
-  }
-
-#if (!defined ENABLE_GE) || (defined ENABLE_INFER)
-  // Now don't use the graph because the exec ge function don't take effect
-  MS_EXCEPTION_IF_NULL(info_[phase]->func_graph);
-  if (ENABLE_TRAIN != info_[phase]->func_graph->flags()["training"]) {
-    MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries";
-    ConfigManager::GetInstance().ResetConfig();
-    return py::none();
-  }
-#endif
-
-  std::shared_ptr<py::object> ret_val = std::make_shared<py::object>();
-  if (IsGraphOutputValueNodeOrParameter(info_[phase]->func_graph->output(), args, ret_val)) {
-    ConfigManager::GetInstance().ResetConfig();
-    return *ret_val;
-  }
-
-  std::vector<tensor::TensorPtr> inputs;
-  ProcessGeArg(args, phase, &inputs);
-
-  std::shared_ptr<py::object> ret = DoExecGraph(GetFuncGraph(phase), inputs, phase);
-  ConfigManager::GetInstance().ResetConfig();
-  if (ret != nullptr) {
-    return *ret;
-  } else {
-    MS_LOG(EXCEPTION) << "exec graph failed";
-  }
 }

 void ExecutorPy::RunInitGraph(const py::dict& init_params, const std::string& phase) {
-  MS_LOG(DEBUG) << "ExecInitGraph start.";
-  TensorOrderMap inputs_with_name{};
-  ConvertObjectToTensors(init_params, &inputs_with_name);
-  std::vector<tensor::TensorPtr> inputs;
-  (void)std::transform(inputs_with_name.begin(), inputs_with_name.end(), std::back_inserter(inputs),
-                       [](const std::pair<std::string, tensor::TensorPtr>& item) { return item.second; });
-
-  std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
-  if (ge_tensors.size() != inputs.size()) {
-    MS_LOG(ERROR) << "Args convert to ge tensor error.";
-    return;
-  }
-  MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size() << ".";
-
-  std::vector<GeTensorPtr> ge_outputs;
-  transform::RunOptions run_options;
-
-  run_options.name = phase;
-  if (DfGraphManager::GetInstance().GetGraphByName(phase) == nullptr) {
-    MS_LOG(WARNING) << "Can not find " << phase << " sub graph, don't need data init subgraph in INFER mode.";
-    return;
-  }
-  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
-  if (graph_runner == nullptr) {
-    MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
-  }
-  {
-    // Release GIL before calling into (potentially long-running) C++ code
-    py::gil_scoped_release release;
-    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
-    if (ret != Status::SUCCESS) {
-      MS_LOG(EXCEPTION) << "Exec " << phase << " graph failed.";
-    }
-
-    MS_LOG(INFO) << "Exec " << phase << " graph success.";
-
-    if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) &&
-        (DfGraphManager::GetInstance().GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) {
-      run_options.name = BROADCAST_GRAPH_NAME;
-      ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
-      if (ret != Status::SUCCESS) {
-        MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed.";
-      }
-      MS_LOG(INFO) << "Exec broadcast graph success.";
-    }
-  }
-}
-
-Status CreateSessionAndGraphRunner(bool is_training = true) {
-  std::shared_ptr<ge::Session> sess = DfGraphManager::GetInstance().GetGeSession();
-  if (sess == nullptr) {
-    transform::SessionOptions options;
-    if (is_training) {
-      options["ge.trainFlag"] = "1";
-      options["ge.streamNum"] = "100";
-      options["ge.enabledLocalFmkop"] = "1";
-      options["ge.hcomParallel"] = "1";
-    } else {
-      options["ge.trainFlag"] = "0";
-    }
-
-    options["ge.enablePrintOpPass"] = "0";
-    sess = transform::GraphRunner::NewSession(options);
-    if (sess == nullptr) {
-      MS_LOG(ERROR) << "Init data graph failed, because of create Ge session failed";
-      return Status::FAILED;
-    } else {
-      DfGraphManager::GetInstance().SetGeSession(sess);
-    }
-  }
-
-  transform::GraphRunnerOptions options;
-  options.sess_ptr = sess;
-  auto graph_runner = std::make_shared<transform::GraphRunner>(options);
-  if (graph_runner == nullptr) {
-    MS_LOG(ERROR) << "Create new graph runner failed";
-    return Status::FAILED;
-  } else {
-    DfGraphManager::GetInstance().SetGraphRunner(graph_runner);
-  }
-
-  return Status::SUCCESS;
-}
-
-void ExecutorPy::ConvertObjectToTensors(const py::dict& dict, TensorOrderMap* const tensors) {
-  for (auto item : dict) {
-    if ((!py::isinstance<py::str>(item.first))) {
-      MS_LOG(WARNING) << "Type of key of py_dict is not string, ignore it.";
-      continue;
-    }
-    std::shared_ptr<Tensor> tensor;
-    std::string name = py::cast<std::string>(item.first);
-    if (py::isinstance<py::float_>(item.second.attr("default_input"))) {
-      // convert float to tensor with shape([1])
-      tensor = std::make_shared<Tensor>(kNumberTypeFloat32, std::vector<int>({1}));
-      *(static_cast<float*>(tensor->data_c(true))) = py::cast<float>(item.second.attr("default_input"));
-    } else if (py::isinstance<py::int_>(item.second.attr("default_input"))) {
-      // convert int to tensor with shape([1])
-      tensor = std::make_shared<Tensor>(kNumberTypeInt32, std::vector<int>({1}));
-      *(static_cast<float*>(tensor->data_c(true))) = py::cast<float>(item.second.attr("default_input"));
-    } else if (py::hasattr(item.second.attr("default_input"), PYTHON_TENSOR_FLAG)) {
-      // cast tensor
-      tensor = py::cast<std::shared_ptr<Tensor>>(item.second.attr("default_input"));
-    }
-
-    if (tensor == nullptr) {
-      MS_LOG(EXCEPTION) << "Get default value for " << name << " failed";
-    }
-    (void)tensors->emplace(name, tensor);
-  }
-}
-
-bool ExecutorPy::AddDFGraph(const py::dict& init_params, const std::string& phase, const py::object& broadcast_params) {
-  FuncGraphPtr anf_graph = info_[phase]->func_graph;
-  DfGraphConvertor convertor(anf_graph);
-
-  size_t pos = phase.find('.');
-  std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1));
-  std::string phase_prefix = phase.substr(0, pos);
-
-  if (phase_prefix == "export") {
-    MS_LOG(INFO) << "Set DfGraphConvertor training : false";
-    convertor.set_training(false);
-  }
-
-  TensorOrderMap init_tensors{};
-  ConvertObjectToTensors(init_params, &init_tensors);
-  (void)convertor.ConvertAllNode().InitParam(init_tensors).BuildGraph();
-
-  if (broadcast_params != py::none()) {
-    if (!py::isinstance<py::dict>(broadcast_params)) {
-      MS_LOG(ERROR) << "Invalid broadcast params, it must be py::dict type";
-      return false;
-    }
-    py::dict broadcast = broadcast_params.cast<py::dict>();
-    if (broadcast.empty()) {
-      (void)convertor.GenerateBroadcastGraph(init_tensors);
-    } else {
-      TensorOrderMap broadcast_tensors{};
-      ConvertObjectToTensors(broadcast, &broadcast_tensors);
-      (void)convertor.GenerateBroadcastGraph(broadcast_tensors);
-    }
-    MS_LOG(INFO) << "Generate broadcast graph with params and broadcast_empty is " << broadcast.empty();
-  }
-
-  (void)convertor.GenerateCheckpointGraph();
-  if (convertor.ErrCode() != 0) {
-    DfGraphManager::GetInstance().ClearGraph();
-    MS_LOG(ERROR) << "convert df graph failed, err:" << convertor.ErrCode();
-    return false;
-  }
-
-  if (MsContext::GetInstance()->save_graphs_flag()) {
-    convertor.DrawComputeGraph(GetFilePathName("ge_graph.dot"));                      // for debug
-    convertor.DrawInitGraph(GetFilePathName("init_graph.dot"));                       // for debug
-    convertor.DrawSaveCheckpointGraph(GetFilePathName("save_checkpoint_graph.dot"));  // for debug
-  }
-  std::string init_graph = "init_subgraph." + net_id;
-  std::string checkpoint_name = "save." + net_id;
-  if (phase.find("train") != std::string::npos) {
-    (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}});
-  } else {
-    (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph());
-  }
-  (void)DfGraphManager::GetInstance().AddGraph(init_graph, convertor.GetInitGraph());
-  (void)DfGraphManager::GetInstance().AddGraph(BROADCAST_GRAPH_NAME, convertor.GetBroadcastGraph());
-  Status ret = DfGraphManager::GetInstance().AddGraph(checkpoint_name, convertor.GetSaveCheckpointGraph());
-  if (ret == Status::SUCCESS) {
-    DfGraphManager::GetInstance().SetAnfGraph(checkpoint_name, anf_graph);
-  }
-
-  return true;
-}
-
-FuncGraphPtr ExecutorPy::BuildDFGraph(const py::dict& init_params, const std::string& phase,
-                                      const py::object& broadcast_params) {
-  if (info_.count(phase) == 0) {
-    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
-  }
-  FuncGraphPtr anf_graph = info_[phase]->func_graph;
-
-  if (MsContext::GetInstance()->save_graphs_flag()) {
-    draw::Draw(GetFilePathName("anf_graph.dot"), anf_graph);  // for debug
-    DumpIR(GetFilePathName("anf_graph.ir"), anf_graph, true);
-  }
-
-  if (!AddDFGraph(init_params, phase, broadcast_params)) {
-    MS_LOG(ERROR) << "GenConvertor failed";
-    return nullptr;
-  }
-
-#if ENABLE_TRAIN
-  (void)setenv("GE_TRAIN", "1", 1);
-#else
-  (void)setenv("GE_TRAIN", "0", 1);
+#if ENABLE_GE
+  RunGEInitGraph(init_params, phase);
 #endif
-
-  if (CreateSessionAndGraphRunner(static_cast<bool>(ENABLE_TRAIN)) != Status::SUCCESS) {
-    MS_LOG(ERROR) << "Create GE Session or GraphRunner failed.";
-    return nullptr;
-  }
-
-  return anf_graph;
 }

 bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t batch_size,
@ -1156,47 +669,16 @@ bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t ba
  std::string name = MsContext::GetInstance()->backend_policy();
  if (name == kMsConvert || name == kMsVm) {
    return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes);
-  } else {
-    return InitExecDatasetGe(queue_name, iter_num, batch_size, types, shapes, input_indexes, phase);
  }
-}
-
-bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size,
-                       const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
-                       const std::vector<int64_t>& input_indexes, const std::string& phase) {
-  // Convert types to GE types and TF types
-  std::vector<int64_t> ge_types;
-  (void)std::transform(types.begin(), types.end(), std::back_inserter(ge_types), [](const TypePtr& i) -> int64_t {
-    return transform::TransformUtil::ConvertDataType(i->type_id());
-  });
-
-  ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE);
-  ConfigManager::GetInstance().set_iter_num(size);
-  ConfigManager::GetInstance().set_dataset_phase(phase);
-
-  DatasetGraphParam param(queue_name, size, batch_size, ge_types, shapes, input_indexes);
-  ConfigManager::GetInstance().set_dataset_param(param);
-
-  if (transform::BuildDatasetGraph(param, phase) != transform::SUCCESS) {
-    MS_LOG(ERROR) << "Build dateset graph failed.";
-    return false;
-  }
-
-#if ENABLE_TRAIN
-  (void)setenv("GE_TRAIN", "1", 1);
+#if ENABLE_GE
+  return InitExecDatasetGe(queue_name, iter_num, batch_size, types, shapes, input_indexes, phase);
 #else
-  (void)setenv("GE_TRAIN", "0", 1);
-#endif
-
-  if (CreateSessionAndGraphRunner(static_cast<bool>(ENABLE_TRAIN)) != Status::SUCCESS) {
-    MS_LOG(ERROR) << "Create GE Session or GraphRunner failed.";
-    return false;
+  std::string backend = MsContext::GetInstance()->backend_policy();
+  if (backend == "ge") {
+    return true;
  }
-
-  MS_LOG(INFO) << "DoExecNonInputGraph:" << phase;
-  DoExecNonInputGraph(phase);
-
-  return true;
+#endif
+  return false;
 }

 bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batch_size,
@ -1259,25 +741,6 @@ bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batc
  return true;
 }

-void InitGe() {
-  // set python env flag
-  mindspore::parse::python_adapter::set_python_env_flag(true);
-  // open tsd before ge initialize
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  if (!ms_context->OpenTsd()) {
-    MS_LOG(EXCEPTION) << "open tsd failed";
-  }
-  (void)ms_context->InitGe();
-}
-
-void FinalizeGe() {
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  (void)context_ptr->FinalizeGe();
-  (void)context_ptr->CloseTsd();
-}
-
 void ResetOpId() { mindspore::id_generator::reset_id(); }

 void InitHccl() {
@ -1309,24 +772,57 @@ void FinalizeHccl() {
  device::KernelRuntimeManager::Instance().ClearRuntimeResource();
 #endif
 }
-void ExportDFGraph(const std::string& file_name, const std::string&, const std::string& phase) {
-  MS_LOG(DEBUG) << "ExportGraph Begin";
-  transform::DfGraphWrapperPtr wrap_ptr = DfGraphManager::GetInstance().GetGraphByName(phase);
-  if (wrap_ptr == nullptr) {
-    MS_LOG(ERROR) << "Get graph form DfGraphManager failed!";
-    return;
-  }

-  transform::DfGraphPtr ge_graph = wrap_ptr->graph_ptr_;
-  if (nullptr == ge_graph) {
-    MS_LOG(ERROR) << "The export graph is null";
-    return;
-  }
-
-  (void)ge_graph->SaveToFile(file_name);
-
-  MS_LOG(DEBUG) << "ExportGraph End";
+void ExportGraph(const std::string& file_name, const std::string&, const std::string& phase) {
+#if (ENABLE_GE || ENABLE_D)
+  ExportDFGraph(file_name, phase);
+#endif
+  MS_LOG(WARNING) << "In ut test no export_graph";
 }

+void ReleaseGeTsd() {
+  auto context_ptr = MsContext::GetInstance();
+  if (context_ptr != nullptr) {
+    (void)context_ptr->FinalizeGe(true);
+    (void)context_ptr->CloseTsd(true);
+  }
+}
+
+void InitGe() {
+  // set python env flag
+  mindspore::parse::python_adapter::set_python_env_flag(true);
+  // open tsd before ge initialize
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  if (!ms_context->OpenTsd()) {
+    MS_LOG(EXCEPTION) << "open tsd failed";
+  }
+  (void)ms_context->InitGe();
+}
+
+void FinalizeGe() {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  (void)context_ptr->FinalizeGe();
+  (void)context_ptr->CloseTsd();
+}
+
+void ClearResAtexit() {
+  MS_LOG(DEBUG) << "Pipeline clear all resource";
+  device::KernelRuntimeManager::Instance().ClearRuntimeResource();
+
+  ad::g_k_prims.clear();
+
+  abstract::ClearPrimEvaluatorMap();
+  compile::ClearConvertCache();
+  pipeline::GetMethodMap().clear();
+  pipeline::ExecutorPy::ClearRes();
+#ifdef ENABLE_GE
+  transform::DfGraphManager::GetInstance().ClearGraph();
+  transform::DfGraphConvertor::get_adpt_map().clear();
+#endif
+  ReleaseGeTsd();
+  parse::python_adapter::ResetPythonScope();
+}
 }  // namespace pipeline
 }  // namespace mindspore
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@ -30,6 +30,7 @@
 #include "pipeline/action.h"
 #include "vm/segment_runner.h"
 #include "vm/transform.h"
+#include "pipeline/base.h"

 namespace mindspore {
 extern const char kMsConvert[];
@ -55,14 +56,6 @@ class Pipeline {
  std::vector<ActionItem> actions_;
 };

-struct ExecutorInfo {
-  FuncGraphPtr func_graph;
-  ResourcePtr resource;
-  std::size_t arg_list_size;
-};
-
-using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>;
-
 // A function pipeline.
 class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
 public:
@ -80,11 +73,7 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
  bool CompileInner(const py::object& obj, const py::tuple& args, const py::object& phase, bool use_vm);
  bool Compile(const py::object& obj, const py::tuple& args, const py::object& phase, bool use_vm);

-  // for graph mode
-  py::object ExecDFGraph(const py::tuple& args, const std::string& phase = "train");
-
  void ProcessVmArg(const py::tuple& args, const std::string& phase, VectorRef* arg_list);
-  void ProcessGeArg(const py::tuple& args, const std::string& phase, std::vector<tensor::TensorPtr>* inputs);

  // for pynative mode when use_vm is on
  py::object Run(const py::tuple& args, const py::object& phase);
@ -95,9 +84,8 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
  compile::VmEvalFuncPtr GetVmEvalFunc(const std::string& phase);
  bool HasCompiled(const std::string& phase) const;

-  bool AddDFGraph(const py::dict& init_params, const std::string& phase, const py::object& broadcast_params);
-  FuncGraphPtr BuildDFGraph(const py::dict& init_params, const std::string& phase,
-                            const py::object& broadcast_params = {});
+  FuncGraphPtr BuildGraph(const py::dict& init_params, const std::string& phase,
+                          const py::object& broadcast_params = {});
  void RunInitGraph(const py::dict& init_params, const std::string& phase);
  py::dict GetParameterLayout(const std::string& phase);
  py::dict GetCNodeStrategy(const std::string& phase);
@ -122,32 +110,29 @@ using ExecutorPyPtr = std::shared_ptr<ExecutorPy>;
 py::tuple GenerateKey(const std::string& name, const std::unordered_map<std::string, py::object>& defaults);
 py::bool_ VerifyInputSignature(const py::list input_signature, const py::tuple inputs);

-void SetGeOption(const std::map<std::string, std::string>& options);
 bool InitDistribute(const std::map<std::string, std::string>& options);

 void ResetOpId();
-void InitGe();
-void FinalizeGe();
 void InitHccl();
 void FinalizeHccl();
+void InitGe();
+void FinalizeGe();
+
+void ClearResAtexit();
+void ReleaseGeTsd();
+
+void ExportGraph(const std::string& file_name, const std::string&, const std::string& phase);

 // init and exec dataset sub graph
 bool InitExecDataset(const std::string& queue_name, int64_t iter_num, int64_t batch_size,
                     const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
                     const std::vector<int64_t>& input_indexes, const std::string& phase);

-// init and exec dataset sub graph for GE backend
-bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size,
-                       const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
-                       const std::vector<int64_t>& input_indexes, const std::string& phase);
-
 // Build and run dataset subgraph for ms backend
 bool InitExecDatasetVm(const std::string& queue_name, int64_t size, int64_t batch_size,
                       const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
                       const std::vector<int64_t>& input_indexes);

-void ExportDFGraph(const std::string& file_name, const std::string&, const std::string& phase);
-
 }  // namespace pipeline
 }  // namespace mindspore

--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@ -0,0 +1,545 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pipeline/pipeline_ge.h"
+
+#include <sstream>
+#include <map>
+#include <unordered_map>
+#include <cstdlib>
+#include <algorithm>
+
+#include "debug/anf_ir_dump.h"
+#include "ir/meta_tensor.h"
+#include "transform/convert.h"
+#include "transform/df_graph_manager.h"
+#include "transform/graph_builder.h"
+#include "transform/graph_runner.h"
+#include "debug/draw.h"
+#include "pipeline/static_analysis/abstract_value.h"
+
+namespace mindspore {
+namespace pipeline {
+using Tensor = mindspore::tensor::Tensor;
+using MetaTensor = mindspore::tensor::MetaTensor;
+using TensorOrderMap = std::map<std::string, std::shared_ptr<Tensor>>;
+using mindspore::abstract::AbstractTensor;
+using mindspore::abstract::AbstractTuple;
+using mindspore::abstract::AbstractTuplePtr;
+using mindspore::transform::DfGraphConvertor;
+using mindspore::transform::DfGraphManager;
+using mindspore::transform::GeTensorPtr;
+using mindspore::transform::MeTensorPtr;
+using mindspore::transform::Status;
+using mindspore::transform::TransformUtil;
+
+void DoExecNonInputGraph(const std::string& phase) {
+  std::vector<GeTensorPtr> ge_tensors;
+  std::vector<GeTensorPtr> ge_outputs;
+  transform::RunOptions run_options;
+  run_options.name = phase;
+  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
+
+  if (graph_runner == nullptr) {
+    MS_LOG(ERROR) << "Can not found GraphRunner";
+    return;
+  }
+  {
+    // Release GIL before calling into (potentially long-running) C++ code
+    py::gil_scoped_release release;
+    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
+    if (ret != Status::SUCCESS) {
+      MS_LOG(ERROR) << "Exec graph:" << run_options.name << " failed";
+      return;
+    }
+  }
+}
+
+void SetGeOption(const std::map<std::string, std::string>& options) {
+  ConfigManager::GetInstance().set_ge_initialize_options(options);
+}
+
+Status CreateSessionAndGraphRunner(bool is_training = true) {
+  std::shared_ptr<ge::Session> sess = DfGraphManager::GetInstance().GetGeSession();
+  if (sess == nullptr) {
+    transform::SessionOptions options;
+    if (is_training) {
+      options["ge.trainFlag"] = "1";
+      options["ge.streamNum"] = "100";
+      options["ge.enabledLocalFmkop"] = "1";
+      options["ge.hcomParallel"] = "1";
+    } else {
+      options["ge.trainFlag"] = "0";
+    }
+
+    options["ge.enablePrintOpPass"] = "0";
+    sess = transform::GraphRunner::NewSession(options);
+    if (sess == nullptr) {
+      MS_LOG(ERROR) << "Init data graph failed, because of create Ge session failed";
+      return Status::FAILED;
+    } else {
+      DfGraphManager::GetInstance().SetGeSession(sess);
+    }
+  }
+
+  transform::GraphRunnerOptions options;
+  options.sess_ptr = sess;
+  auto graph_runner = std::make_shared<transform::GraphRunner>(options);
+  if (graph_runner == nullptr) {
+    MS_LOG(ERROR) << "Create new graph runner failed";
+    return Status::FAILED;
+  } else {
+    DfGraphManager::GetInstance().SetGraphRunner(graph_runner);
+  }
+
+  return Status::SUCCESS;
+}
+
+bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size,
+                       const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
+                       const std::vector<int64_t>& input_indexes, const std::string& phase) {
+  std::vector<int64_t> ge_types;
+  (void)std::transform(types.begin(), types.end(), std::back_inserter(ge_types), [](const TypePtr& i) -> int64_t {
+    return transform::TransformUtil::ConvertDataType(i->type_id());
+  });
+
+  ConfigManager::GetInstance().set_dataset_mode(DatasetMode::DS_GRAPH_MODE);
+  ConfigManager::GetInstance().set_iter_num(size);
+  ConfigManager::GetInstance().set_dataset_phase(phase);
+
+  DatasetGraphParam param(queue_name, size, batch_size, ge_types, shapes, input_indexes);
+  ConfigManager::GetInstance().set_dataset_param(param);
+
+  if (transform::BuildDatasetGraph(param, phase) != transform::SUCCESS) {
+    MS_LOG(ERROR) << "Build dateset graph failed.";
+    return false;
+  }
+
+#if ENABLE_TRAIN
+  (void)setenv("GE_TRAIN", "1", 1);
+#else
+  (void)setenv("GE_TRAIN", "0", 1);
+#endif
+
+  if (CreateSessionAndGraphRunner(static_cast<bool>(ENABLE_TRAIN)) != Status::SUCCESS) {
+    MS_LOG(ERROR) << "Create GE Session or GraphRunner failed.";
+    return false;
+  }
+
+  MS_LOG(INFO) << "DoExecNonInputGraph:" << phase;
+  DoExecNonInputGraph(phase);
+
+  return true;
+}
+
+void ConvertObjectToTensors(const py::dict& dict, TensorOrderMap* const tensors) {
+  for (auto item : dict) {
+    if ((!py::isinstance<py::str>(item.first))) {
+      MS_LOG(WARNING) << "Type of key of py_dict is not string, ignore it.";
+      continue;
+    }
+    std::shared_ptr<Tensor> tensor;
+    std::string name = py::cast<std::string>(item.first);
+    if (py::isinstance<py::float_>(item.second.attr("default_input"))) {
+      // convert float to tensor with shape([1])
+      tensor = std::make_shared<Tensor>(kNumberTypeFloat32, std::vector<int>({1}));
+      *(static_cast<float*>(tensor->data_c(true))) = py::cast<float>(item.second.attr("default_input"));
+    } else if (py::isinstance<py::int_>(item.second.attr("default_input"))) {
+      // convert int to tensor with shape([1])
+      tensor = std::make_shared<Tensor>(kNumberTypeInt32, std::vector<int>({1}));
+      *(static_cast<float*>(tensor->data_c(true))) = py::cast<float>(item.second.attr("default_input"));
+    } else if (py::hasattr(item.second.attr("default_input"), PYTHON_TENSOR_FLAG)) {
+      // cast tensor
+      tensor = py::cast<std::shared_ptr<Tensor>>(item.second.attr("default_input"));
+    }
+
+    if (tensor == nullptr) {
+      MS_LOG(EXCEPTION) << "Get default value for " << name << " failed";
+    }
+    (void)tensors->emplace(name, tensor);
+  }
+}
+
+bool AddDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const py::dict& init_params,
+                const std::string& phase, const py::object& broadcast_params) {
+  FuncGraphPtr anf_graph = info.at(phase)->func_graph;
+  DfGraphConvertor convertor(anf_graph);
+
+  size_t pos = phase.find('.');
+  std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1));
+  std::string phase_prefix = phase.substr(0, pos);
+
+  if (phase_prefix == "export") {
+    MS_LOG(INFO) << "Set DfGraphConvertor training : false";
+    convertor.set_training(false);
+  }
+
+  TensorOrderMap init_tensors{};
+  ConvertObjectToTensors(init_params, &init_tensors);
+  (void)convertor.ConvertAllNode().InitParam(init_tensors).BuildGraph();
+
+  if (broadcast_params != py::none()) {
+    if (!py::isinstance<py::dict>(broadcast_params)) {
+      MS_LOG(ERROR) << "Invalid broadcast params, it must be py::dict type";
+      return false;
+    }
+    py::dict broadcast = broadcast_params.cast<py::dict>();
+    if (broadcast.empty()) {
+      (void)convertor.GenerateBroadcastGraph(init_tensors);
+    } else {
+      TensorOrderMap broadcast_tensors{};
+      ConvertObjectToTensors(broadcast, &broadcast_tensors);
+      (void)convertor.GenerateBroadcastGraph(broadcast_tensors);
+    }
+    MS_LOG(INFO) << "Generate broadcast graph with params and broadcast_empty is " << broadcast.empty();
+  }
+
+  (void)convertor.GenerateCheckpointGraph();
+  if (convertor.ErrCode() != 0) {
+    DfGraphManager::GetInstance().ClearGraph();
+    MS_LOG(ERROR) << "convert df graph failed, err:" << convertor.ErrCode();
+    return false;
+  }
+
+  if (MsContext::GetInstance()->save_graphs_flag()) {
+    convertor.DrawComputeGraph(GetFilePathName("ge_graph.dot"));                      // for debug
+    convertor.DrawInitGraph(GetFilePathName("init_graph.dot"));                       // for debug
+    convertor.DrawSaveCheckpointGraph(GetFilePathName("save_checkpoint_graph.dot"));  // for debug
+  }
+  std::string init_graph = "init_subgraph." + net_id;
+  std::string checkpoint_name = "save." + net_id;
+  if (phase.find("train") != std::string::npos) {
+    (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}});
+  } else {
+    (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph());
+  }
+  (void)DfGraphManager::GetInstance().AddGraph(init_graph, convertor.GetInitGraph());
+  (void)DfGraphManager::GetInstance().AddGraph(checkpoint_name, convertor.GetSaveCheckpointGraph());
+  (void)DfGraphManager::GetInstance().AddGraph(BROADCAST_GRAPH_NAME, convertor.GetBroadcastGraph());
+
+  DfGraphManager::GetInstance().SetAnfGraph(checkpoint_name, anf_graph);
+
+  return true;
+}
+
+FuncGraphPtr BuildDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const py::dict& init_params,
+                          const std::string& phase, const py::object& broadcast_params) {
+  if (info.count(phase) == 0) {
+    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
+  }
+  FuncGraphPtr anf_graph = info.at(phase)->func_graph;
+
+  if (MsContext::GetInstance()->save_graphs_flag()) {
+    draw::Draw(GetFilePathName("anf_graph.dot"), anf_graph);  // for debug
+    DumpIR(GetFilePathName("anf_graph.ir"), anf_graph, true);
+  }
+
+  if (!AddDFGraph(info, init_params, phase, broadcast_params)) {
+    MS_LOG(ERROR) << "GenConvertor failed";
+    return nullptr;
+  }
+
+#if ENABLE_TRAIN
+  (void)setenv("GE_TRAIN", "1", 1);
+#else
+  (void)setenv("GE_TRAIN", "0", 1);
+#endif
+
+  if (CreateSessionAndGraphRunner(static_cast<bool>(ENABLE_TRAIN)) != Status::SUCCESS) {
+    MS_LOG(ERROR) << "Create GE Session or GraphRunner failed.";
+    return nullptr;
+  }
+
+  return anf_graph;
+}
+
+void RunGEInitGraph(const py::dict& init_params, const std::string& phase) {
+  MS_LOG(DEBUG) << "ExecInitGraph start.";
+  TensorOrderMap inputs_with_name{};
+  ConvertObjectToTensors(init_params, &inputs_with_name);
+  std::vector<tensor::TensorPtr> inputs;
+  (void)std::transform(inputs_with_name.begin(), inputs_with_name.end(), std::back_inserter(inputs),
+                       [](const std::pair<std::string, tensor::TensorPtr>& item) { return item.second; });
+
+  std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
+  if (ge_tensors.size() != inputs.size()) {
+    MS_LOG(ERROR) << "Args convert to ge tensor error.";
+    return;
+  }
+  MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size() << ".";
+
+  std::vector<GeTensorPtr> ge_outputs;
+  transform::RunOptions run_options;
+
+  run_options.name = phase;
+  if (DfGraphManager::GetInstance().GetGraphByName(phase) == nullptr) {
+    MS_LOG(WARNING) << "Can not find " << phase << " sub graph, don't need data init subgraph in INFER mode.";
+    return;
+  }
+  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
+  if (graph_runner == nullptr) {
+    MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
+  }
+  {
+    // Release GIL before calling into (potentially long-running) C++ code
+    py::gil_scoped_release release;
+    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
+    if (ret != Status::SUCCESS) {
+      MS_LOG(EXCEPTION) << "Exec " << phase << " graph failed.";
+    }
+
+    MS_LOG(INFO) << "Exec " << phase << " graph success.";
+
+    if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) &&
+        (DfGraphManager::GetInstance().GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) {
+      run_options.name = BROADCAST_GRAPH_NAME;
+      ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
+      if (ret != Status::SUCCESS) {
+        MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed.";
+      }
+      MS_LOG(INFO) << "Exec broadcast graph success.";
+    }
+  }
+}
+
+py::object ExtractGeneralCnodeRet(const AbstractBasePtr& cnode_data, const py::tuple& data, size_t* count) {
+  MS_EXCEPTION_IF_NULL(cnode_data);
+  if (*count >= data.size()) {
+    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
+                      << " less than the number of elements required. ";
+  }
+
+  if (cnode_data->isa<AbstractTensor>()) {
+    BaseShapePtr shape = cnode_data->BuildShape();
+    auto shape_act = shape->cast<abstract::ShapePtr>()->shape();
+    Tensor tensor_exp = py::cast<Tensor>(data[*count]);
+    if (shape_act != tensor_exp.shape()) {
+      MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as "
+                           "the shape of the tensor derived from ME.";
+    }
+    return data[(*count)++];
+  }
+
+  if (!cnode_data->isa<AbstractTuple>()) {
+    MS_LOG(EXCEPTION) << "The output of operator in the final anf graph could "
+                      << "only be a tensor or a tuple of tensor, but got " << cnode_data->BuildValue()->ToString()
+                      << ".";
+  }
+  auto data_tp = cnode_data->cast<AbstractTuplePtr>();
+  auto elements = data_tp->elements();
+  size_t size = data_tp->size();
+  py::tuple tp = py::tuple(size);
+  for (size_t i = 0; i < size; i++) {
+    tp[i] = ExtractGeneralCnodeRet(elements[i], data, count);
+  }
+  return std::move(tp);
+}
+
+py::object StructureOutput(const AnfNodePtr& output_node, const py::tuple& data, size_t* count) {
+  MS_EXCEPTION_IF_NULL(output_node);
+
+  if (output_node->isa<ValueNode>()) {
+    return ValuePtrToPyData(GetValueNode(output_node));
+  }
+
+  if (*count >= data.size()) {
+    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
+                      << " less than the number of elements required. ";
+  }
+  if (output_node->isa<Parameter>()) {
+    return data[(*count)++];
+  }
+
+  auto output_c = output_node->cast<CNodePtr>();
+  if (output_c == nullptr) {
+    MS_LOG(EXCEPTION) << "The final anf graph could only have constant, parameter, and operator, but got "
+                      << output_node->ToString();
+  }
+
+  if (output_c->IsApply(prim::kPrimMakeTuple)) {
+    auto input_list = output_c->inputs();
+    size_t size = input_list.size();
+    py::tuple tp = py::tuple(size - 1);
+    for (size_t i = 1; i < size; i++) {
+      tp[i - 1] = StructureOutput(input_list[i], data, count);
+    }
+    return std::move(tp);
+  }
+  if (output_c->IsApply(prim::kPrimDepend)) {
+    return StructureOutput(output_c->input(1), data, count);
+  }
+
+  return ExtractGeneralCnodeRet(output_c->abstract(), data, count);
+}
+
+std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr& graph, const std::vector<MeTensorPtr>& inputs,
+                                        const std::string& phase) {
+  std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
+  if (ge_tensors.size() != inputs.size()) {
+    MS_LOG(ERROR) << "args convert to ge tensor error";
+    return nullptr;
+  }
+
+  std::vector<GeTensorPtr> ge_outputs;
+  transform::RunOptions run_options;
+
+  run_options.name = phase;
+
+  auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
+
+  if (graph_runner == nullptr) {
+    MS_LOG(ERROR) << "Can not found GraphRunner";
+    return nullptr;
+  }
+
+  {
+    // Release GIL before calling into (potentially long-running) C++ code
+    py::gil_scoped_release release;
+    MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size();
+    Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
+    MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << ge_outputs.size();
+    if (ret != Status::SUCCESS) {
+      MS_LOG(ERROR) << "Exec graph failed";
+      return nullptr;
+    }
+  }
+
+  std::vector<MeTensorPtr> me_outputs = TransformUtil::ConvertGeTensors(ge_outputs);
+  if (me_outputs.size() != ge_outputs.size()) {
+    MS_LOG(ERROR) << "Convert output Ge tensor to Me tensor failed";
+  }
+
+  py::tuple outputs(me_outputs.size());
+  for (std::size_t i = 0; i < outputs.size(); i++) {
+    outputs[i] = *me_outputs[i];
+  }
+
+  std::shared_ptr<py::object> ret = nullptr;
+
+#ifdef ENABLE_GE
+  AnfNodePtr root = graph->get_return();
+  MS_EXCEPTION_IF_NULL(root);
+  AbstractBasePtr output = root->abstract();
+  size_t count = 0;
+  py::object oj = StructureOutput(output, outputs, &count);
+  ret = std::make_shared<py::object>(oj);
+#else
+  if (outputs.size() == 1) {
+    ret = std::make_shared<py::object>(outputs[0]);
+  } else {
+    ret = std::make_shared<py::object>(outputs);
+  }
+#endif
+
+  return ret;
+}
+
+void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr>& info, const py::tuple& args, const std::string& phase,
+                  std::vector<tensor::TensorPtr>* inputs) {
+  // check the arg and use the ExecutorPy args
+  std::size_t size = args.size();
+
+  if (info.count(phase) == 0) {
+    MS_LOG(EXCEPTION) << "no phase in executor:" << GetPhasePrefix(phase);
+  }
+
+  auto arg_size = info.at(phase)->arg_list_size;
+  if (size != arg_size) {
+    MS_LOG(EXCEPTION) << "The real arg num : size = " << size << ". graph_arg_size = " << arg_size;
+  }
+
+  // process the first args of tensor
+  // only in Dataset Feed Mode, fp_bp graph need input tensors
+  if (ConfigManager::GetInstance().dataset_mode() == DS_FEED_MODE) {
+    for (std::size_t i = 0; i < size; i++) {
+      ValuePtr converted = nullptr;
+      bool succ = parse::ConvertData(args[i], &converted);
+      if (!succ) {
+        MS_LOG(EXCEPTION) << "args convert error";
+      }
+      if (converted->isa<tensor::Tensor>()) {
+        (*inputs).push_back(converted->cast<tensor::TensorPtr>());
+      } else {
+        MS_LOG(EXCEPTION) << "args, " << converted->ToString() << " is not tensor";
+      }
+    }
+  }
+}
+
+py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const py::tuple& args,
+                       const std::string& phase) {
+  std::string phase_prefix = GetPhasePrefix(phase);
+
+  if (phase_prefix == "save") {
+    DoExecNonInputGraph(phase);
+    ConfigManager::GetInstance().ResetConfig();
+    return py::none();
+  }
+
+  if (info.count(phase) == 0) {
+    MS_LOG(EXCEPTION) << "has no phase:" << phase;
+  }
+
+  FuncGraphPtr anf_graph = info.at(phase)->func_graph;
+
+#if (!defined ENABLE_GE) || (defined ENABLE_INFER)
+  // Now don't use the graph because the exec ge function don't take effect
+  MS_EXCEPTION_IF_NULL(info.at(phase)->func_graph);
+  if (ENABLE_TRAIN != info.at(phase)->func_graph->flags()["training"]) {
+    MS_LOG(ERROR) << "Graph training mode mismatch mode of libraries";
+    ConfigManager::GetInstance().ResetConfig();
+    return py::none();
+  }
+#endif
+
+  std::shared_ptr<py::object> ret_val = std::make_shared<py::object>();
+  // We will not execute graph when output is constant or just input itself.
+  if (IsGraphOutputValueNodeOrParameter(info.at(phase)->func_graph->output(), args, ret_val)) {
+    ConfigManager::GetInstance().ResetConfig();
+    return *ret_val;
+  }
+
+  std::vector<tensor::TensorPtr> inputs;
+  ProcessGeArg(info, args, phase, &inputs);
+
+  std::shared_ptr<py::object> ret = DoExecGraph(anf_graph, inputs, phase);
+  ConfigManager::GetInstance().ResetConfig();
+  if (ret != nullptr) {
+    return *ret;
+  } else {
+    MS_LOG(EXCEPTION) << "exec graph failed";
+  }
+}
+void ExportDFGraph(const std::string& file_name, const std::string& phase) {
+  MS_LOG(DEBUG) << "ExportGraph Begin";
+  transform::DfGraphWrapperPtr wrap_ptr = DfGraphManager::GetInstance().GetGraphByName(phase);
+  if (wrap_ptr == nullptr) {
+    MS_LOG(ERROR) << "Get graph form DfGraphManager failed!";
+    return;
+  }
+
+  transform::DfGraphPtr ge_graph = wrap_ptr->graph_ptr_;
+  if (nullptr == ge_graph) {
+    MS_LOG(ERROR) << "The export graph is null";
+    return;
+  }
+
+  (void)ge_graph->SaveToFile(file_name);
+
+  MS_LOG(DEBUG) << "ExportGraph End";
+}
+}  // namespace pipeline
+}  // namespace mindspore
--- a/mindspore/ccsrc/pipeline/pipeline_ge.h
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.h
@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_
+#define MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_
+
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include <unordered_map>
+#include <map>
+#include <mutex>
+
+#include "pybind11/pybind11.h"
+#include "pipeline/base.h"
+#include "operator/ops.h"
+
+namespace mindspore {
+namespace pipeline {
+
+namespace py = pybind11;
+
+void SetGeOption(const std::map<std::string, std::string>& options);
+
+void RunGEInitGraph(const py::dict& init_params, const std::string& phase);
+
+py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const py::tuple& args,
+                       const std::string& phase = "train");
+
+FuncGraphPtr BuildDFGraph(const std::map<std::string, ExecutorInfoPtr>& info, const py::dict& init_params,
+                          const std::string& phase, const py::object& broadcast_params = {});
+
+// init and exec dataset sub graph for GE backend
+bool InitExecDatasetGe(const std::string& queue_name, int64_t size, int64_t batch_size,
+                       const std::vector<TypePtr>& types, const std::vector<std::vector<int64_t>>& shapes,
+                       const std::vector<int64_t>& input_indexes, const std::string& phase);
+
+void ExportDFGraph(const std::string& file_name, const std::string& phase);
+
+}  // namespace pipeline
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PIPELINE_PIPELINE_GE_H_
--- a/mindspore/ccsrc/pipeline/resource.cc
+++ b/mindspore/ccsrc/pipeline/resource.cc
@ -25,19 +25,13 @@
 #include "pipeline/parse/data_converter.h"
 #include "operator/ops.h"
 #include "utils/graph_utils.h"
-#include "transform/convert.h"
 #include "optimizer/ad/dfunctor.h"
 #include "vm/segment_runner.h"
-#include "utils/context/ms_context.h"
-#include "transform/df_graph_manager.h"
-#include "device/kernel_runtime_manager.h"

 namespace mindspore {
 // namespace to support opmap definition
 namespace pipeline {

-using MethodMap = std::unordered_map<int, std::unordered_map<std::string, Any>>;
-
 MethodMap& GetMethodMap() {
  static MethodMap method_map = {{kObjectTypeString,
                                  {
@ -255,28 +249,5 @@ void Resource::Clean() {
  trace::ClearTraceStack();
  is_cleaned_ = true;
 }
-
-void ReleaseGeTsd() {
-  auto context_ptr = MsContext::GetInstance();
-  if (context_ptr != nullptr) {
-    (void)context_ptr->FinalizeGe(true);
-    (void)context_ptr->CloseTsd(true);
-  }
-}
-
-void ClearResAtexit() {
-  MS_LOG(DEBUG) << "pipeline clear all resource";
-  device::KernelRuntimeManager::Instance().ClearRuntimeResource();
-  transform::DfGraphManager::GetInstance().ClearGraph();
-  ad::g_k_prims.clear();
-
-  abstract::ClearPrimEvaluatorMap();
-  compile::ClearConvertCache();
-  transform::DfGraphConvertor::get_adpt_map().clear();
-  pipeline::GetMethodMap().clear();
-  pipeline::ExecutorPy::ClearRes();
-
-  ReleaseGeTsd();
-}
 }  // namespace pipeline
 }  // namespace mindspore
--- a/mindspore/ccsrc/pipeline/resource.h
+++ b/mindspore/ccsrc/pipeline/resource.h
@ -44,6 +44,10 @@ const char kOutput[] = "output";

 class InferenceResource;

+using MethodMap = std::unordered_map<int, std::unordered_map<std::string, Any>>;
+
+MethodMap& GetMethodMap();
+
 class ResourceBase {
 public:
  ResourceBase() { manager_ = MakeManager(); }
@ -110,9 +114,6 @@ class Resource : public ResourceBase {

 using ResourcePtr = std::shared_ptr<pipeline::Resource>;

-void ClearResAtexit();
-void ReleaseGeTsd();
-
 }  // namespace pipeline
 }  // namespace mindspore

--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@ -21,7 +21,7 @@
 #include "pre_activate/ascend/ir_fission/bn_grad_split.h"
 #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
-#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h"
+#include "pre_activate/common/ir_fusion/allreduce_fusion.h"
 #include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
 #include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 #include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
@ -237,11 +237,11 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v

  std::vector<std::string> input_names;
  for (uint8_t i = 0; i < inputs_list.size(); i++) {
-    input_names.emplace_back("input" + to_string(i));
+    input_names.emplace_back("input" + std::to_string(i));
  }
  std::vector<std::string> output_names;
  for (uint8_t i = 0; i < outputs_list.size(); i++) {
-    output_names.emplace_back("output" + to_string(i));
+    output_names.emplace_back("output" + std::to_string(i));
  }

  ValuePtr input_names_v = MakeValue(input_names);
--- a/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.cc
@ -13,7 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h"
+#include "pre_activate/common/ir_fusion/allreduce_fusion.h"

 #include <vector>
 #include <string>
--- a/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.h
+++ b/mindspore/ccsrc/pre_activate/common/ir_fusion/allreduce_fusion.h
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_
 #include <vector>

 #include "pre_activate/common/pass.h"
@ -46,4 +46,4 @@ class AllReduceFusion : public Pass {
 };
 }  // namespace opt
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_ALLREDUCE_FUSION_H_
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_IR_FUSION_ALLREDUCE_FUSION_H_
--- a/mindspore/ccsrc/predict/converter/kernel2ms.cc
+++ b/mindspore/ccsrc/predict/converter/kernel2ms.cc
@ -16,7 +16,7 @@

 #include "predict/converter/kernel2ms.h"
 #include <algorithm>
-#include "transform/convert.h"
+#include "ir/anf.h"
 #include "predict/converter/lite_model/op_attr_packer.h"
 #include "mindspore/ccsrc/operator/ops.h"

@ -135,7 +135,7 @@ void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr
  if (node->isa<CNode>()) {
    auto c_node = node->cast<CNodePtr>();
    MS_EXCEPTION_IF_NULL(c_node);
-    std::string c_node_name = transform::GetCNodeFuncName(c_node);
+    std::string c_node_name = GetCNodeFuncName(c_node);
    if (c_node_name == prim::kPrimTupleGetItem->name()) {
      auto v_node = c_node->inputs()[kTupleGetItemIndex]->cast<ValueNodePtr>();
      MS_EXCEPTION_IF_NULL(v_node);
@ -321,7 +321,7 @@ bool Kernel2Ms::SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, con
  }
  for (const auto &input_node : kernel_graph_ptr->inputs()) {
    if (input_node->isa<Parameter>()) {
-      ParameterPtr pk_node = dynamic_pointer_cast<Parameter>(input_node);
+      ParameterPtr pk_node = std::dynamic_pointer_cast<Parameter>(input_node);
      TensorPtr device_tensor;
      if (convert_mode_ == kConvertCpuMode) {
        device_tensor = predict::utils::GetParaCpuTensor(input_node);
--- a/mindspore/ccsrc/pynative/base.h
+++ b/mindspore/ccsrc/pynative/base.h
@ -0,0 +1,67 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PYNATIVE_BASE_H_
+#define MINDSPORE_CCSRC_PYNATIVE_BASE_H_
+
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "pybind11/pybind11.h"
+#include "ir/primitive.h"
+#include "pipeline/static_analysis/abstract_value.h"
+
+namespace mindspore {
+namespace pynative {
+
+namespace py = pybind11;
+
+enum PynativeStatusCode {
+  PYNATIVE_SUCCESS = 0,
+  PYNATIVE_OP_NOT_IMPLEMENTED_ERR = 1,
+  PYNATIVE_OP_INPUTS_ERR = 2,
+  PYNATIVE_OP_PARAMS_ERR = 3,
+  PYNATIVE_OP_ATTRS_ERR = 4,
+  PYNATIVE_GRAPH_MANAGER_ERR = 5,
+  PYNATIVE_GRAPH_GE_BUILD_ERR = 6,
+  PYNATIVE_GRAPH_GE_RUN_ERR = 7,
+  PYNATIVE_UNKNOWN_STATE = 0XFF
+};
+
+enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_INPUT_MASK, PY_ARGS_NUM };
+
+struct OpExecInfo {
+  PrimitivePyPtr py_primitive;
+  std::string op_name;
+  AbstractBasePtr abstract;
+
+  py::tuple op_inputs;
+  py::tuple inputs_mask;
+  py::dict op_attrs;
+};
+using OpExecInfoPtr = std::shared_ptr<OpExecInfo>;
+OpExecInfoPtr GenerateOpExecInfo(const py::args& args);
+
+const std::unordered_set<std::string> ignore_infer_prim = {"partial"};
+
+}  // namespace pynative
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PYNATIVE_BASE_H_
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute.cc
@ -29,16 +29,18 @@
 #include "pipeline/static_analysis/prim.h"
 #include "session/session_factory.h"

+#include "pynative/base.h"
+
+#ifdef ENABLE_GE
+#include "pynative/pynative_execute_ge.h"
+#endif
+
 const char SINGLE_OP_GRAPH[] = "single_op_graph";
 // primitive unable to infer value for constant input in pynative mode
-const std::unordered_set<std::string> ignore_infer_prim = {"partial"};
 const std::unordered_set<std::string> vm_operators = {"partial", "depend"};

 namespace mindspore {
 namespace pynative {
-using transform::GraphRunner;
-using transform::GraphRunnerOptions;
-using transform::OperatorPtr;
 inline ValuePtr PyAttrValue(const py::object& obj) {
  ValuePtr converted_ret = nullptr;
  bool converted = parse::ConvertData(obj, &converted_ret);
@ -48,32 +50,12 @@ inline ValuePtr PyAttrValue(const py::object& obj) {
  return converted_ret;
 }

-MeTensorPtr ConvertPyObjToTensor(const py::object& obj) {
-  MeTensorPtr me_tensor_ptr = nullptr;
-  if (py::isinstance<MeTensor>(obj)) {
-    me_tensor_ptr = py::cast<MeTensorPtr>(obj);
-  } else if (py::isinstance<py::tuple>(obj)) {
-    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::tuple>(obj), nullptr);
-  } else if (py::isinstance<py::float_>(obj)) {
-    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::float_>(obj), nullptr);
-  } else if (py::isinstance<py::int_>(obj)) {
-    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::int_>(obj), nullptr);
-  } else if (py::isinstance<py::list>(obj)) {
-    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::list>(obj), nullptr);
-  } else if (py::isinstance<py::array>(obj)) {
-    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::array>(obj), nullptr);
-  } else {
-    MS_LOG(EXCEPTION) << "run op inputs type is invalid!";
-  }
-  return me_tensor_ptr;
-}
-
 void PynativeInfer(const PrimitivePyPtr& prim, const py::tuple& py_args, OpExecInfo* const op_exec_info) {
  size_t size = py_args.size();
  AbstractBasePtrList args_spec_list;
  for (size_t i = 0; i < size; i++) {
    ValuePtr input_value = PyAttrValue(py_args[i]);
-    if (py::isinstance<MeTensor>(py_args[i])) {
+    if (py::isinstance<tensor::Tensor>(py_args[i])) {
      args_spec_list.emplace_back(abstract::FromValueInside(input_value, true));
    } else {
      args_spec_list.emplace_back(abstract::FromValueInside(input_value, false));
@ -140,241 +122,6 @@ std::string GetSingleOpGraphInfo(const OpExecInfoPtr& op_exec_info) {
  return graph_info;
 }

-bool SetInputsForSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
-                               const OperatorPtr& op, std::vector<GeOperator>* graph_input_nodes) {
-  MS_EXCEPTION_IF_NULL(op_exec_info);
-  MS_EXCEPTION_IF_NULL(graph_input_nodes);
-  auto op_inputs = op_exec_info->op_inputs;
-  std::string op_name = op_exec_info->op_name;
-  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
-  if (adapter == nullptr) {
-    return false;
-  }
-
-  int op_input_idx = 1;
-  size_t size = inputs.size();
-  for (size_t i = 0; i < size; i++) {
-    if (inputs[i] == nullptr) {
-      continue;
-    }
-    auto const_op = std::make_shared<transform::Constant>();
-    MS_EXCEPTION_IF_NULL(const_op);
-    (void)const_op->set_attr_value(*inputs[i]);
-    MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]);
-    MS_EXCEPTION_IF_NULL(me_tensor_ptr);
-    auto const_op_desc =
-      transform::TransformUtil::GetGeTensorDesc(me_tensor_ptr->shape_c(), me_tensor_ptr->data_type(), kOpFormat_NCHW);
-    if (const_op_desc == nullptr) {
-      MS_LOG(ERROR) << "Create variable " << op_name << " ouptut descriptor failed!";
-      return false;
-    }
-    auto pointer_cast_const_op = std::static_pointer_cast<transform::Constant>(const_op);
-    MS_EXCEPTION_IF_NULL(pointer_cast_const_op);
-    (void)pointer_cast_const_op->update_output_desc_y(*const_op_desc);
-    auto& input_map = adapter->getInputMap();
-    if (input_map.find(op_input_idx) == input_map.end()) {
-      continue;
-    }
-    if (adapter->setInput(op, op_input_idx++, const_op)) {
-      MS_LOG(ERROR) << "fail to set params, index is " << op_input_idx;
-      return false;
-    }
-    graph_input_nodes->push_back(*const_op);
-  }
-  return true;
-}
-
-bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
-                        const std::unordered_map<std::string, ValuePtr>& attrs, const GeGraphPtr& graph) {
-  MS_EXCEPTION_IF_NULL(op_exec_info);
-  std::string op_name = op_exec_info->op_name;
-  auto op_inputs = op_exec_info->op_inputs;
-  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
-  if (adapter == nullptr) {
-    MS_LOG(ERROR) << "Unable to find Adapter for " << ((std::string)py::str(op_name));
-    return false;
-  }
-  OperatorPtr op = adapter->generate(op_name);
-  MS_EXCEPTION_IF_NULL(op);
-
-  std::vector<GeOperator> graph_input_nodes;
-  // hold param nodes after setting input and output for the graph
-  // set input
-  if (!SetInputsForSingleOpGraph(op_exec_info, inputs, op, &graph_input_nodes)) {
-    return false;
-  }
-  // set attributes
-  for (auto attr : attrs) {
-    (void)adapter->setAttr(op, attr.first, attr.second);
-  }
-  // set default attributes
-  auto extra_attrs = adapter->GetExtraAttr();
-  for (auto attr : extra_attrs) {
-    (void)adapter->setAttr(op, attr.first, attr.second);
-  }
-  // set input attributes
-  auto& input_attr_map = adapter->getInputAttrMap();
-  for (auto& it : input_attr_map) {
-    if (op_inputs.size() < it.first) {
-      continue;
-    }
-    auto const_value = PyAttrValue(op_inputs[it.first - 1]);
-    if (const_value->isa<None>()) {
-      continue;
-    }
-    it.second.set_attr(op, const_value);
-  }
-  // construct output data nodes
-  std::vector<GeOperator> graph_outputs{*op};
-  // set input and output nodes for the graph
-  MS_EXCEPTION_IF_NULL(graph);
-  (void)graph->SetInputs(graph_input_nodes).SetOutputs(graph_outputs);
-  MS_LOG(INFO) << "BuildSingleOpGraph done";
-  return true;
-}
-
-void ToTensorPtr(const OpExecInfoPtr op_exec_info, std::vector<GeTensorPtr>* const inputs) {
-  MS_EXCEPTION_IF_NULL(inputs);
-  MS_EXCEPTION_IF_NULL(op_exec_info);
-  auto op_inputs = op_exec_info->op_inputs;
-  size_t size = op_inputs.size();
-  for (size_t i = 0; i < size; i++) {
-    if (py::isinstance<py::none>(op_inputs[i])) {
-      inputs->emplace_back(nullptr);
-      continue;
-    }
-    MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]);
-    auto ge_tensor_ptr = transform::TransformUtil::ConvertTensor(me_tensor_ptr, kOpFormat_NCHW);
-    if (ge_tensor_ptr == nullptr) {
-      MS_LOG(EXCEPTION) << "convert inputs to GE tensor failed in op " << op_exec_info->op_name << ".";
-    }
-    // set inputs for operator to build single node graph
-    inputs->push_back(ge_tensor_ptr);
-  }
-}
-
-PynativeStatusCode ConvertAttributes(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs) {
-  MS_EXCEPTION_IF_NULL(op_exec_info);
-  auto op_attrs = op_exec_info->op_attrs;
-  std::unordered_map<std::string, ValuePtr> attrs{};
-
-  for (auto& item : op_attrs) {
-    if (!py::isinstance<py::str>(item.first)) {
-      MS_LOG(ERROR) << "type error in py dict convert";
-      return PYNATIVE_OP_ATTRS_ERR;
-    }
-    std::string name = py::cast<std::string>(item.first);
-    auto attr_value = PyAttrValue(py::cast<py::object>(item.second));
-    (void)attrs.emplace(name, attr_value);
-  }
-
-  // build graph
-  GeGraphPtr graph = std::make_shared<GeGraph>(op_exec_info->op_name);
-  if (BuildSingleOpGraph(op_exec_info, inputs, attrs, graph) == false) {
-    MS_LOG(ERROR) << "Fail to BuildSingleOpGraph";
-    return PYNATIVE_GRAPH_GE_BUILD_ERR;
-  }
-
-  // add the single op graph into the graph manager, which will be iterated by session.
-  transform::Status ret =
-    transform::DfGraphManager::GetInstance().AddGraph(SINGLE_OP_GRAPH, std::shared_ptr<transform::DfGraph>(graph));
-  if (ret != transform::SUCCESS) {
-    MS_LOG(ERROR) << "Fail to AddGraph into graph manager";
-    return PYNATIVE_GRAPH_MANAGER_ERR;
-  }
-
-  return PYNATIVE_SUCCESS;
-}
-
-std::vector<MeTensorPtr> ConvertOutputTensors(const OpExecInfoPtr& op_exec_info,
-                                              const std::vector<GeTensorPtr>& ge_tensors) {
-  std::vector<MeTensorPtr> outputs;
-  AbstractBasePtr abs_base = op_exec_info->abstract;
-  std::vector<std::vector<int>> shapes;
-  if (abs_base != nullptr && abs_base->isa<abstract::AbstractTensor>()) {
-    auto arg_tensor = dyn_cast<abstract::AbstractTensor>(abs_base);
-    shapes.emplace_back(arg_tensor->shape()->shape());
-    outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes);
-    return outputs;
-  }
-  if (abs_base != nullptr && abs_base->isa<abstract::AbstractTuple>()) {
-    auto arg_tuple = dyn_cast<abstract::AbstractTuple>(abs_base);
-    size_t len = arg_tuple->size();
-
-    for (size_t i = 0; i < len; i++) {
-      if (arg_tuple->elements()[i]->isa<abstract::AbstractTensor>()) {
-        auto arg_tensor = dyn_cast<abstract::AbstractTensor>(arg_tuple->elements()[i]);
-        shapes.emplace_back(arg_tensor->shape()->shape());
-      }
-    }
-    outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes);
-    return outputs;
-  }
-  for (auto& it : ge_tensors) {
-    auto tensor = transform::TransformUtil::ConvertGeTensor(it);
-    if (tensor != nullptr) {
-      outputs.emplace_back(tensor);
-    }
-  }
-  return outputs;
-}
-
-py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) {
-  MS_LOG(INFO) << "RunOpInGe start";
-  MS_EXCEPTION_IF_NULL(op_exec_info);
-  MS_EXCEPTION_IF_NULL(status);
-
-  // returns a null py::tuple on error
-  py::tuple err_ret(0);
-  auto op_name = op_exec_info->op_name;
-  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
-  if (adapter == nullptr) {
-    MS_LOG(ERROR) << "Unable to find GE Adapter for " << ((std::string)py::str(op_name));
-    *status = PYNATIVE_OP_NOT_IMPLEMENTED_ERR;
-    return std::move(err_ret);
-  }
-
-  std::vector<GeTensorPtr> inputs{};
-  ToTensorPtr(op_exec_info, &inputs);
-  // convert me attr to ge AttrValue
-  PynativeStatusCode ret = ConvertAttributes(op_exec_info, inputs);
-  if (ret != PYNATIVE_SUCCESS) {
-    *status = ret;
-    return std::move(err_ret);
-  }
-  // run graph
-  transform::RunOptions run_options;
-  run_options.name = SINGLE_OP_GRAPH;
-  std::vector<GeTensorPtr> ge_inputs;
-  std::vector<GeTensorPtr> ge_outputs;
-  transform::GraphRunnerOptions graph_runner_options;
-  graph_runner_options.options["ge.trainFlag"] = "1";
-  auto graph_runner = std::make_shared<transform::GraphRunner>(graph_runner_options);
-  transform::Status run_ret;
-  {
-    // Release GIL before calling into (potentially long-running) C++ code
-    py::gil_scoped_release release;
-    run_ret = graph_runner->RunGraph(run_options, ge_inputs, &ge_outputs);
-  }
-  if (run_ret != transform::Status::SUCCESS) {
-    MS_LOG(ERROR) << "GraphRunner Fails to Run Graph";
-    *status = PYNATIVE_GRAPH_GE_RUN_ERR;
-    return std::move(err_ret);
-  }
-
-  std::vector<MeTensorPtr> graph_outputs = ConvertOutputTensors(op_exec_info, ge_outputs);
-  size_t output_size = graph_outputs.size();
-  py::tuple result(output_size);
-  for (size_t i = 0; i < output_size; i++) {
-    MS_EXCEPTION_IF_NULL(graph_outputs[i]);
-    result[i] = *graph_outputs[i];
-  }
-
-  *status = PYNATIVE_SUCCESS;
-  MS_LOG(INFO) << "RunOpInGe end";
-  return std::move(result);
-}
-
 py::object RunOpInVM(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) {
  MS_LOG(INFO) << "RunOpInVM start";

@ -423,12 +170,6 @@ py::object RunOpWithBackendPolicy(MsBackendPolicy backend_policy, const OpExecIn
  MS_EXCEPTION_IF_NULL(status);
  py::object result;
  switch (backend_policy) {
-    case kMsBackendGeOnly: {
-      // use GE only
-      MS_LOG(INFO) << "RunOp use GE only backend";
-      result = RunOpInGE(op_exec_info, status);
-      break;
-    }
    case kMsBackendVmOnly: {
      // use vm only
      MS_LOG(INFO) << "RunOp use VM only backend";
@ -436,22 +177,14 @@ py::object RunOpWithBackendPolicy(MsBackendPolicy backend_policy, const OpExecIn
      break;
    }
    case kMsBackendGePrior: {
+#ifdef ENABLE_GE
      // use GE first, use vm when GE fails
      MS_LOG(INFO) << "RunOp use GE first backend";
      result = RunOpInGE(op_exec_info, status);
      if (*status != PYNATIVE_SUCCESS) {
        result = RunOpInVM(op_exec_info, status);
      }
-      break;
-    }
-    case kMsBackendVmPrior: {
-      // GE_VM_SILENT
-      // (should not use this policy) use vm first, use GE when vm fails
-      MS_LOG(INFO) << "RunOp use VM first backend";
-      result = RunOpInVM(op_exec_info, status);
-      if (*status != PYNATIVE_SUCCESS) {
-        result = RunOpInGE(op_exec_info, status);
-      }
+#endif
      break;
    }
    case kMsBackendMsPrior: {
--- a/mindspore/ccsrc/pynative/pynative_execute.h
+++ b/mindspore/ccsrc/pynative/pynative_execute.h
@ -25,55 +25,14 @@

 #include "pybind11/pybind11.h"

-#include "transform/convert.h"
-#include "transform/graph_runner.h"
-#include "transform/types.h"
+#include "pynative/base.h"
 #include "utils/context/ms_context.h"

 namespace mindspore {
 namespace pynative {

-using MeTensor = mindspore::tensor::Tensor;
-using MeTensorPtr = mindspore::tensor::TensorPtr;
-using GeTensor = ge::Tensor;
-using GeTensorPtr = std::shared_ptr<GeTensor>;
-using GeGraph = ge::Graph;
-using GeGraphPtr = std::shared_ptr<GeGraph>;
-using GeOperator = ge::Operator;
-using GeOperatorPtr = std::shared_ptr<GeOperator>;
-
 namespace py = pybind11;

-enum PynativeStatusCode {
-  PYNATIVE_SUCCESS = 0,
-  PYNATIVE_OP_NOT_IMPLEMENTED_ERR = 1,
-  PYNATIVE_OP_INPUTS_ERR = 2,
-  PYNATIVE_OP_PARAMS_ERR = 3,
-  PYNATIVE_OP_ATTRS_ERR = 4,
-  PYNATIVE_GRAPH_MANAGER_ERR = 5,
-  PYNATIVE_GRAPH_GE_BUILD_ERR = 6,
-  PYNATIVE_GRAPH_GE_RUN_ERR = 7,
-  PYNATIVE_UNKNOWN_STATE = 0XFF
-};
-
-enum RunOpArgsEnum { PY_PRIM = 0, PY_NAME, PY_INPUTS, PY_INPUT_MASK, PY_ARGS_NUM };
-
-struct OpExecInfo {
-  PrimitivePyPtr py_primitive;
-  std::string op_name;
-  AbstractBasePtr abstract;
-
-  py::tuple op_inputs;
-  py::tuple inputs_mask;
-  py::dict op_attrs;
-};
-using OpExecInfoPtr = std::shared_ptr<OpExecInfo>;
-OpExecInfoPtr GenerateOpExecInfo(const py::args& args);
-bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
-                        const std::unordered_map<std::string, ValuePtr>& attrs, const GeGraphPtr& graph);
-
-py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status);
-
 py::object RunOpInVM(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status);

 py::tuple RunOp(const py::args& args);
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute_ge.cc
@ -0,0 +1,311 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pynative/pynative_execute_ge.h"
+
+#include <typeinfo>
+#include <map>
+#include <set>
+#include <unordered_set>
+
+#include "utils/any.h"
+#include "utils/utils.h"
+#include "utils/context/ms_context.h"
+#include "operator/ops.h"
+#include "pipeline/parse/data_converter.h"
+#include "pipeline/static_analysis/prim.h"
+#include "session/session_factory.h"
+
+const char SINGLE_OP_GRAPH[] = "single_op_graph";
+
+namespace mindspore {
+namespace pynative {
+
+using MeTensor = mindspore::tensor::Tensor;
+using MeTensorPtr = mindspore::tensor::TensorPtr;
+using GeOperator = ge::Operator;
+using GeOperatorPtr = std::shared_ptr<GeOperator>;
+
+using transform::GraphRunner;
+using transform::GraphRunnerOptions;
+using transform::OperatorPtr;
+static std::shared_ptr<session::SessionBasic> session = nullptr;
+inline ValuePtr PyAttrValue(const py::object& obj) {
+  ValuePtr converted_ret = nullptr;
+  bool converted = parse::ConvertData(obj, &converted_ret);
+  if (!converted) {
+    MS_LOG(EXCEPTION) << "attribute convert error with type:" << std::string(py::str(obj));
+  }
+  return converted_ret;
+}
+
+MeTensorPtr ConvertPyObjToTensor(const py::object& obj) {
+  MeTensorPtr me_tensor_ptr = nullptr;
+  if (py::isinstance<MeTensor>(obj)) {
+    me_tensor_ptr = py::cast<MeTensorPtr>(obj);
+  } else if (py::isinstance<py::tuple>(obj)) {
+    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::tuple>(obj), nullptr);
+  } else if (py::isinstance<py::float_>(obj)) {
+    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::float_>(obj), nullptr);
+  } else if (py::isinstance<py::int_>(obj)) {
+    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::int_>(obj), nullptr);
+  } else if (py::isinstance<py::list>(obj)) {
+    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::list>(obj), nullptr);
+  } else if (py::isinstance<py::array>(obj)) {
+    me_tensor_ptr = std::make_shared<MeTensor>(py::cast<py::array>(obj), nullptr);
+  } else {
+    MS_LOG(EXCEPTION) << "run op inputs type is invalid!";
+  }
+  return me_tensor_ptr;
+}
+
+bool SetInputsForSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
+                               const OperatorPtr& op, std::vector<GeOperator>* graph_input_nodes) {
+  MS_EXCEPTION_IF_NULL(op_exec_info);
+  MS_EXCEPTION_IF_NULL(graph_input_nodes);
+  auto op_inputs = op_exec_info->op_inputs;
+  std::string op_name = op_exec_info->op_name;
+  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
+  if (adapter == nullptr) {
+    return false;
+  }
+
+  int op_input_idx = 1;
+  size_t size = inputs.size();
+  for (size_t i = 0; i < size; i++) {
+    if (inputs[i] == nullptr) {
+      continue;
+    }
+    auto const_op = std::make_shared<transform::Constant>();
+    MS_EXCEPTION_IF_NULL(const_op);
+    (void)const_op->set_attr_value(*inputs[i]);
+    MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]);
+    MS_EXCEPTION_IF_NULL(me_tensor_ptr);
+    auto const_op_desc =
+      transform::TransformUtil::GetGeTensorDesc(me_tensor_ptr->shape_c(), me_tensor_ptr->data_type(), kOpFormat_NCHW);
+    if (const_op_desc == nullptr) {
+      MS_LOG(ERROR) << "Create variable " << op_name << " ouptut descriptor failed!";
+      return false;
+    }
+    auto pointer_cast_const_op = std::static_pointer_cast<transform::Constant>(const_op);
+    MS_EXCEPTION_IF_NULL(pointer_cast_const_op);
+    (void)pointer_cast_const_op->update_output_desc_y(*const_op_desc);
+    auto& input_map = adapter->getInputMap();
+    if (input_map.find(op_input_idx) == input_map.end()) {
+      continue;
+    }
+    if (adapter->setInput(op, op_input_idx++, const_op)) {
+      MS_LOG(ERROR) << "fail to set params, index is " << op_input_idx;
+      return false;
+    }
+    graph_input_nodes->push_back(*const_op);
+  }
+  return true;
+}
+
+bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
+                        const std::unordered_map<std::string, ValuePtr>& attrs, const GeGraphPtr& graph) {
+  MS_EXCEPTION_IF_NULL(op_exec_info);
+  std::string op_name = op_exec_info->op_name;
+  auto op_inputs = op_exec_info->op_inputs;
+  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
+  if (adapter == nullptr) {
+    MS_LOG(ERROR) << "Unable to find Adapter for " << ((std::string)py::str(op_name));
+    return false;
+  }
+  OperatorPtr op = adapter->generate(op_name);
+  MS_EXCEPTION_IF_NULL(op);
+
+  std::vector<GeOperator> graph_input_nodes;
+  // hold param nodes after setting input and output for the graph
+  // set input
+  if (!SetInputsForSingleOpGraph(op_exec_info, inputs, op, &graph_input_nodes)) {
+    return false;
+  }
+  // set attributes
+  for (auto attr : attrs) {
+    (void)adapter->setAttr(op, attr.first, attr.second);
+  }
+  // set default attributes
+  auto extra_attrs = adapter->GetExtraAttr();
+  for (auto attr : extra_attrs) {
+    (void)adapter->setAttr(op, attr.first, attr.second);
+  }
+  // set input attributes
+  auto& input_attr_map = adapter->getInputAttrMap();
+  for (auto& it : input_attr_map) {
+    if (op_inputs.size() < it.first) {
+      continue;
+    }
+    auto const_value = PyAttrValue(op_inputs[it.first - 1]);
+    if (const_value->isa<None>()) {
+      continue;
+    }
+    it.second.set_attr(op, const_value);
+  }
+  // construct output data nodes
+  std::vector<GeOperator> graph_outputs{*op};
+  // set input and output nodes for the graph
+  MS_EXCEPTION_IF_NULL(graph);
+  (void)graph->SetInputs(graph_input_nodes).SetOutputs(graph_outputs);
+  MS_LOG(INFO) << "BuildSingleOpGraph done";
+  return true;
+}
+
+void ToTensorPtr(const OpExecInfoPtr op_exec_info, std::vector<GeTensorPtr>* const inputs) {
+  MS_EXCEPTION_IF_NULL(inputs);
+  MS_EXCEPTION_IF_NULL(op_exec_info);
+  auto op_inputs = op_exec_info->op_inputs;
+  size_t size = op_inputs.size();
+  for (size_t i = 0; i < size; i++) {
+    if (py::isinstance<py::none>(op_inputs[i])) {
+      inputs->emplace_back(nullptr);
+      continue;
+    }
+    MeTensorPtr me_tensor_ptr = ConvertPyObjToTensor(op_inputs[i]);
+    auto ge_tensor_ptr = transform::TransformUtil::ConvertTensor(me_tensor_ptr, kOpFormat_NCHW);
+    if (ge_tensor_ptr == nullptr) {
+      MS_LOG(EXCEPTION) << "convert inputs to GE tensor failed in op " << op_exec_info->op_name << ".";
+    }
+    // set inputs for operator to build single node graph
+    inputs->push_back(ge_tensor_ptr);
+  }
+}
+
+PynativeStatusCode ConvertAttributes(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs) {
+  MS_EXCEPTION_IF_NULL(op_exec_info);
+  auto op_attrs = op_exec_info->op_attrs;
+  std::unordered_map<std::string, ValuePtr> attrs{};
+
+  for (auto& item : op_attrs) {
+    if (!py::isinstance<py::str>(item.first)) {
+      MS_LOG(ERROR) << "type error in py dict convert";
+      return PYNATIVE_OP_ATTRS_ERR;
+    }
+    std::string name = py::cast<std::string>(item.first);
+    auto attr_value = PyAttrValue(py::cast<py::object>(item.second));
+    (void)attrs.emplace(name, attr_value);
+  }
+
+  // build graph
+  GeGraphPtr graph = std::make_shared<GeGraph>(op_exec_info->op_name);
+  if (BuildSingleOpGraph(op_exec_info, inputs, attrs, graph) == false) {
+    MS_LOG(ERROR) << "Fail to BuildSingleOpGraph";
+    return PYNATIVE_GRAPH_GE_BUILD_ERR;
+  }
+
+  // add the single op graph into the graph manager, which will be iterated by session.
+  transform::Status ret =
+    transform::DfGraphManager::GetInstance().AddGraph(SINGLE_OP_GRAPH, std::shared_ptr<transform::DfGraph>(graph));
+  if (ret != transform::SUCCESS) {
+    MS_LOG(ERROR) << "Fail to AddGraph into graph manager";
+    return PYNATIVE_GRAPH_MANAGER_ERR;
+  }
+
+  return PYNATIVE_SUCCESS;
+}
+
+std::vector<MeTensorPtr> ConvertOutputTensors(const OpExecInfoPtr& op_exec_info,
+                                              const std::vector<GeTensorPtr>& ge_tensors) {
+  std::vector<MeTensorPtr> outputs;
+  AbstractBasePtr abs_base = op_exec_info->abstract;
+  std::vector<std::vector<int>> shapes;
+  if (abs_base != nullptr && abs_base->isa<abstract::AbstractTensor>()) {
+    auto arg_tensor = dyn_cast<abstract::AbstractTensor>(abs_base);
+    shapes.emplace_back(arg_tensor->shape()->shape());
+    outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes);
+    return outputs;
+  }
+  if (abs_base != nullptr && abs_base->isa<abstract::AbstractTuple>()) {
+    auto arg_tuple = dyn_cast<abstract::AbstractTuple>(abs_base);
+    size_t len = arg_tuple->size();
+
+    for (size_t i = 0; i < len; i++) {
+      if (arg_tuple->elements()[i]->isa<abstract::AbstractTensor>()) {
+        auto arg_tensor = dyn_cast<abstract::AbstractTensor>(arg_tuple->elements()[i]);
+        shapes.emplace_back(arg_tensor->shape()->shape());
+      }
+    }
+    outputs = transform::TransformUtil::ConvertGeTensors(ge_tensors, shapes);
+    return outputs;
+  }
+  for (auto& it : ge_tensors) {
+    auto tensor = transform::TransformUtil::ConvertGeTensor(it);
+    if (tensor != nullptr) {
+      outputs.emplace_back(tensor);
+    }
+  }
+  return outputs;
+}
+
+py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status) {
+  MS_LOG(INFO) << "RunOpInGe start";
+  MS_EXCEPTION_IF_NULL(op_exec_info);
+  MS_EXCEPTION_IF_NULL(status);
+
+  // returns a null py::tuple on error
+  py::tuple err_ret(0);
+  auto op_name = op_exec_info->op_name;
+  transform::OpAdapterPtr adapter = transform::DfGraphConvertor::FindAdapter(op_name, true);
+  if (adapter == nullptr) {
+    MS_LOG(ERROR) << "Unable to find GE Adapter for " << ((std::string)py::str(op_name));
+    *status = PYNATIVE_OP_NOT_IMPLEMENTED_ERR;
+    return std::move(err_ret);
+  }
+
+  std::vector<GeTensorPtr> inputs{};
+  ToTensorPtr(op_exec_info, &inputs);
+  // convert me attr to ge AttrValue
+  PynativeStatusCode ret = ConvertAttributes(op_exec_info, inputs);
+  if (ret != PYNATIVE_SUCCESS) {
+    *status = ret;
+    return std::move(err_ret);
+  }
+  // run graph
+  transform::RunOptions run_options;
+  run_options.name = SINGLE_OP_GRAPH;
+  std::vector<GeTensorPtr> ge_inputs;
+  std::vector<GeTensorPtr> ge_outputs;
+  transform::GraphRunnerOptions graph_runner_options;
+  graph_runner_options.options["ge.trainFlag"] = "1";
+  auto graph_runner = std::make_shared<transform::GraphRunner>(graph_runner_options);
+  transform::Status run_ret;
+  {
+    // Release GIL before calling into (potentially long-running) C++ code
+    py::gil_scoped_release release;
+    run_ret = graph_runner->RunGraph(run_options, ge_inputs, &ge_outputs);
+  }
+  if (run_ret != transform::Status::SUCCESS) {
+    MS_LOG(ERROR) << "GraphRunner Fails to Run Graph";
+    *status = PYNATIVE_GRAPH_GE_RUN_ERR;
+    return std::move(err_ret);
+  }
+
+  std::vector<MeTensorPtr> graph_outputs = ConvertOutputTensors(op_exec_info, ge_outputs);
+  size_t output_size = graph_outputs.size();
+  py::tuple result(output_size);
+  for (size_t i = 0; i < output_size; i++) {
+    MS_EXCEPTION_IF_NULL(graph_outputs[i]);
+    result[i] = *graph_outputs[i];
+  }
+
+  *status = PYNATIVE_SUCCESS;
+  MS_LOG(INFO) << "RunOpInGe end";
+  return std::move(result);
+}
+}  // namespace pynative
+
+}  // namespace mindspore
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.h
+++ b/mindspore/ccsrc/pynative/pynative_execute_ge.h
@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_
+#define MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_
+
+#include <vector>
+#include <utility>
+#include <string>
+#include <memory>
+#include <unordered_map>
+
+#include "pynative/base.h"
+#include "transform/convert.h"
+#include "transform/graph_runner.h"
+#include "transform/types.h"
+#include "utils/context/ms_context.h"
+
+using GeTensor = ge::Tensor;
+using GeTensorPtr = std::shared_ptr<GeTensor>;
+using GeGraph = ge::Graph;
+using GeGraphPtr = std::shared_ptr<GeGraph>;
+
+namespace mindspore {
+namespace pynative {
+bool BuildSingleOpGraph(const OpExecInfoPtr& op_exec_info, const std::vector<GeTensorPtr>& inputs,
+                        const std::unordered_map<std::string, ValuePtr>& attrs, const GeGraphPtr& graph);
+
+py::object RunOpInGE(const OpExecInfoPtr& op_exec_info, PynativeStatusCode* status);
+}  // namespace pynative
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PYNATIVE_PYNATIVE_EXECUTE_GE_H_
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@ -35,6 +35,7 @@
 #include "pre_activate/common/helper.h"
 #include "device/kernel_runtime_manager.h"
 #include "kernel/tbe/tbe_python_funcs.h"
+#include "utils/config_manager.h"

 namespace mindspore {
 namespace session {
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@ -19,7 +19,7 @@
 #include "device/gpu/gpu_kernel_runtime.h"
 #include "pre_activate/common/optimizer.h"
 #include "pre_activate/common/pass_manager.h"
-#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h"
+#include "pre_activate/common/ir_fusion/allreduce_fusion.h"
 #include "device/kernel_runtime_manager.h"
 #include "predict/predict.h"
 #include "common/utils.h"
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@ -373,24 +373,6 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
 }

 // ---------------implement of DfGraphConvertor-------------
-std::string GetCNodeFuncName(const CNodePtr cnode) {
-  if (cnode->inputs().empty()) {
-    return "";
-  }
-
-  AnfNodePtr valuenode = cnode->input(0);
-  if (valuenode->isa<ValueNode>()) {
-    auto value = GetValueNode(valuenode);
-    // check whether the valuenode is primitive
-    if (value->isa<Primitive>()) {
-      return value->cast<PrimitivePtr>()->name();
-    } else {
-      return value->ToString();
-    }
-  }
-  return "";
-}
-
 PrimType GetCNodeFuncType(const CNodePtr cnode) {
  if (cnode->inputs().empty()) {
    return kPrimTypeUnknown;
--- a/mindspore/ccsrc/transform/convert.h
+++ b/mindspore/ccsrc/transform/convert.h
@ -253,7 +253,6 @@ class DfGraphConvertor {
  bool distribute_ = false;
 };

-extern std::string GetCNodeFuncName(CNodePtr cnode);
 }  // namespace transform
 }  // namespace mindspore

--- a/mindspore/ccsrc/utils/callbacks.cc
+++ b/mindspore/ccsrc/utils/callbacks.cc
@ -20,16 +20,16 @@
 #include <memory>
 #include <vector>
 #include "pybind11/pybind11.h"
+#ifdef ENABLE_GE
 #include "transform/df_graph_manager.h"
 #include "transform/util.h"
+#endif
 #include "pipeline/parse/data_converter.h"
 #include "pipeline/parse/python_adapter.h"
 #include "utils/visible.h"

 namespace mindspore {
 namespace callbacks {
-using mindspore::transform::Status;
-using mindspore::transform::TransformUtil;

 const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback";
 const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op";
@ -38,6 +38,10 @@ const char kSummary[] = "Summary";
 const char kCheckPoint[] = "Save";
 const int ONE_SHAPE = 1;

+#ifdef ENABLE_GE
+using mindspore::transform::Status;
+using mindspore::transform::TransformUtil;
+
 bool GetParameterShape(const FuncGraphPtr& graph, const std::string& param_name,
                       const std::shared_ptr<std::vector<int>>& shape) {
  if (graph == nullptr) {
@ -181,6 +185,7 @@ uint32_t MS_EXPORT SummarySaveCallback(uint32_t graph_id, const std::map<std::st
  MS_LOG(DEBUG) << "End the summary save callback function.";
  return Status::SUCCESS;
 }
+#endif

 // Cache the summary callback data from ME session
 // Remove the GE module on new architecture
@ -208,10 +213,10 @@ uint32_t MS_EXPORT SummarySaveCallback(uint32_t graph_id, const std::map<std::st
  auto bool_ret = py::cast<bool>(ret);
  if (!bool_ret) {
    MS_LOG(ERROR) << "Python checkpoint return false during callback";
-    return Status::FAILED;
+    return kCallbackFalied;
  }
  MS_LOG(DEBUG) << "End the summary save callback function.";
-  return Status::SUCCESS;
+  return kCallbackOk;
 }
 }  // namespace callbacks
 }  // namespace mindspore
--- a/mindspore/ccsrc/utils/callbacks.h
+++ b/mindspore/ccsrc/utils/callbacks.h
@ -20,8 +20,11 @@
 #include <string>
 #include <vector>
 #include <memory>
+#include "ir/meta_tensor.h"
+#ifdef ENABLE_GE
 #include "transform/types.h"
 #include "transform/util.h"
+#endif

 namespace mindspore {
 namespace callbacks {
@ -36,10 +39,16 @@ extern const char kSummary[];
 extern const char kCheckPoint[];
 extern const std::string kPythonCheckpointModuleName;
 extern const std::string kPythonCheckpointFuncName;
+
+const int kCallbackOk = 0;
+const int kCallbackFalied = 1;
+
 bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name,
                       const std::shared_ptr<std::vector<int>>& shape);
+#ifdef ENABLE_GE
 uint32_t CheckpointSaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
 uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
+#endif
 uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, TensorPtr>&);

 }  // namespace callbacks
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@ -26,13 +26,15 @@
 #include "tdt/tdt_host_interface.h"
 #include "tdt/data_common.h"
 #endif
+#ifdef ENABLE_GE
 #include "transform/df_graph_manager.h"
+#endif
 #include "ir/meta_tensor.h"

 namespace mindspore {
+#ifdef ENABLE_GE
 using mindspore::transform::DfGraphManager;
-using transform::GraphRunner;
-using transform::GraphRunnerOptions;
+#endif

 std::atomic<bool> thread_1_must_end(false);

@ -81,6 +83,7 @@ MsContext::MsContext(const std::string& policy, const std::string& target) {

 std::shared_ptr<MsContext> MsContext::GetInstance() {
  if (inst_context_ == nullptr) {
+    MS_LOG(DEBUG) << "Create new mindspore context";
 #ifdef ENABLE_GE
    inst_context_.reset(new (std::nothrow) MsContext("ge", kAscendDevice));
 #elif defined(ENABLE_D)
--- a/mindspore/ccsrc/utils/context/ms_context.h
+++ b/mindspore/ccsrc/utils/context/ms_context.h
@ -23,7 +23,6 @@
 #include <vector>
 #include <string>
 #include <utility>
-#include "transform/graph_runner.h"
 #include "utils/log_adapter.h"

 namespace mindspore {
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@ -373,4 +373,45 @@ AbstractBasePtr PyListDtype2AbstractTensor(const py::object &shape_obj, const py
    MS_LOG(EXCEPTION) << "Python evaluator return invalid shape or type. " << (std::string)py::str(type_obj);
  }
 }
+bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple &args,
+                                       const std::shared_ptr<py::object> &ret_val) {
+  if (output->isa<ValueNode>()) {
+    MS_LOG(INFO) << "Graph's output is a constant. No need to execute.";
+    ValuePtr value = GetValueNode(output);
+    *ret_val = ValuePtrToPyData(value);
+    return true;
+  }
+
+  // Adapter will transform values in __init__() and construct() to parameters, this could cause
+  // inputs (a.k.a args in current function) size less than parameters'.
+  if (output->isa<Parameter>()) {
+    MS_LOG(INFO) << "Graph's output is a parameter. If all params are inputs, no need to execute.";
+    if (args.empty()) {
+      MS_LOG(EXCEPTION) << "Inputs size is 0, let graph to be executed.";
+    }
+    // Find the right parameter as ret_val.
+    auto func_graph = output->func_graph();
+    MS_EXCEPTION_IF_NULL(func_graph);
+    auto params = func_graph->parameters();
+    if (params.empty()) {
+      MS_EXCEPTION(UnknownError) << "Graph's parameters size is 0";
+    }
+    if (args.size() != params.size()) {
+      MS_LOG(EXCEPTION) << "Input size " << args.size() << " not equal to params size " << params.size()
+                        << ", let graph to be executed.";
+    }
+
+    auto it = std::find(params.begin(), params.end(), output);
+    if (it == params.end()) {
+      MS_EXCEPTION(UnknownError) << "When graph output is Parameter,  it should be found in graph parameters";
+    }
+    size_t index = it - params.cbegin();
+    if (index >= args.size()) {
+      MS_EXCEPTION(UnknownError) << "Index " << index << " equal or larger than args size " << args.size() << ".";
+    }
+    *ret_val = args[index];
+    return true;
+  }
+  return false;
+}
 }  // namespace mindspore
--- a/mindspore/ccsrc/utils/convert_utils.h
+++ b/mindspore/ccsrc/utils/convert_utils.h
@ -18,6 +18,7 @@
 #define MINDSPORE_CCSRC_UTILS_CONVERT_UTILS_H_

 #include <limits>
+#include <memory>
 #include "pybind11/pybind11.h"

 #include "utils/any.h"
@ -120,6 +121,9 @@ inline uint8_t *AddressOffset(void *address, size_t offset) {

 AbstractBasePtr PyListDtype2AbstractTensor(const py::object &shape_obj, const py::object &type_obj);

+bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple &args,
+                                       const std::shared_ptr<py::object> &ret_val);
+
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_UTILS_CONVERT_UTILS_H_
--- a/mindspore/ccsrc/vm/segment_runner.cc
+++ b/mindspore/ccsrc/vm/segment_runner.cc
@ -178,14 +178,12 @@ LinConvertResult Convert(const AnfNodePtrList& lst) {
 }

 LinkFuncType MsVmConvert = Convert<VM>;
-LinkFuncType GeVmConvert = Convert<GeVM>;

-std::unordered_map<std::string, LinkFuncType> backends = {{kMsVm, MsVmConvert}, {kGeVm, GeVmConvert}};
+std::unordered_map<std::string, LinkFuncType> backends = {{kMsVm, MsVmConvert}};

 std::set<std::string> backend_list = {
  kMsConvert,
  kMsVm,
-  kGeVm,
 };

 }  // namespace compile
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@ -24,7 +24,9 @@
 #include <vector>

 #include "pipeline/static_analysis/abstract_value.h"
+#ifdef ENABLE_GE
 #include "transform/convert.h"
+#endif
 #include "utils/graph_utils.h"
 #include "utils/context/ms_context.h"
 #include "debug/trace.h"
@ -55,7 +57,6 @@ CompileGraph::CompileGraph(const BackendPtr& backend, const std::vector<Primitiv
    MS_LOG(INFO) << "Attribute 'is_gevm_convert' is true";
    is_gevm_convert_ = true;
  }
-  is_graph_cut = false;
 }

 bool CompileGraph::IsCut(const AnfNodePtr& node) {
@ -80,14 +81,15 @@ bool CompileGraph::IsCut(const AnfNodePtr& node) {
      }
    }

+#ifdef ENABLE_GE
    if (is_gevm_convert_) {
-      auto name = transform::GetCNodeFuncName(cnode);
+      auto name = GetCNodeFuncName(cnode);
      auto adpt = transform::DfGraphConvertor::FindAdapter(name);
      if (adpt == nullptr) {
-        is_graph_cut = true;
+        return true;
      }
-      return true;
    }
+#endif
  }

  return false;
@ -605,12 +607,6 @@ FinalVMPtr CompileGraphs::CompileAndLink(const FuncGraphPtr& graph) {
  (void)WrapPrimitives(graph);
  Compile(graph);

-#ifdef ENABLE_GE
-  if (!transform_->IsGraphCut()) {
-    return nullptr;
-  }
-#endif
-
  FinalVMPtr rt = Link(graph);
  Reset();
  MS_LOG(DEBUG) << "End";
--- a/mindspore/ccsrc/vm/transform.h
+++ b/mindspore/ccsrc/vm/transform.h
@ -55,7 +55,6 @@ class CompileGraph {

  InstSet Run(const FuncGraphPtr& func_graph);
  InstSet GenMultiGraphsSinkInst(const FuncGraphPtr& graph);
-  bool IsGraphCut() const { return is_graph_cut; }
  bool IsCut(const AnfNodePtr& node);
  void Push(const AnfNodePtr& node);
  void Tie(const AnfNodePtr& n1, const AnfNodePtr& n2) { slots_[n2] = slots_[n1]; }
@ -101,7 +100,6 @@ class CompileGraph {
  BackendPtr backend_;
  LinkFuncType lin_convert_;
  bool is_gevm_convert_;
-  bool is_graph_cut;
  int height_{0};
  int max_height_{0};
  std::vector<PrimitivePtr> cut_list_;
--- a/mindspore/ccsrc/vm/vmimpl.cc
+++ b/mindspore/ccsrc/vm/vmimpl.cc
@ -26,8 +26,6 @@
 #include <memory>
 #include <set>

-#include "transform/graph_runner.h"
-#include "transform/convert.h"
 #include "ir/meta_tensor.h"
 #include "operator/ops.h"
 #include "ir/manager.h"
@ -40,39 +38,6 @@ namespace compile {

 using PrimitivePyPtr = std::shared_ptr<PrimitivePy>;

-static const char SEGMENT_GRAPH_NAME[] = "runnable_segment";
-
-VectorRef GeVM::RunGraph(const FuncGraphPtr& anf_graph, const VectorRef& args) {
-  // Convert graph
-  transform::DfGraphConvertor convertor(anf_graph);
-
-  (void)convertor.ConvertAllNode().BuildGraph();
-  if (convertor.ErrCode() == 0) {
-    (void)transform::DfGraphManager::GetInstance().AddGraph(SEGMENT_GRAPH_NAME, convertor.GetComputeGraph());
-  } else {
-    MS_LOG(EXCEPTION) << "convert df graph failed";
-  }
-
-  // Run graph
-  transform::GraphRunnerOptions options;
-  transform::GraphRunner graph_runner(options);
-  transform::RunOptions run_options;
-  run_options.name = SEGMENT_GRAPH_NAME;
-
-  std::vector<tensor::TensorPtr> inputs;
-  (void)std::transform(std::begin(args), std::end(args), std::back_inserter(inputs),
-                       [](const BaseRef& arg) -> tensor::TensorPtr {
-                         auto value_ref = utils::cast<PyObjectRef>(arg);
-                         auto value = value_ref.object_;
-                         return py::cast<tensor::TensorPtr>(value);
-                       });
-  std::vector<tensor::TensorPtr> outputs;
-  (void)graph_runner.RunGraph(run_options, inputs, &outputs);
-  std::vector<BaseRef> ret;
-  (void)std::copy(outputs.begin(), outputs.end(), std::back_inserter(ret));
-  return VectorRef(ret);
-}
-
 // Indicate a call to a new frame.
 struct CallWrap : public Base {
  explicit CallWrap(const VMFramePtr& vm_frame) : frame(vm_frame) {}
--- a/mindspore/ccsrc/vm/vmimpl.h
+++ b/mindspore/ccsrc/vm/vmimpl.h
@ -64,12 +64,6 @@ class VMImpl {
  virtual ~VMImpl() = default;
 };

-class GeVM : public VMImpl {
- public:
-  VectorRef RunGraph(const FuncGraphPtr& fg, const VectorRef& args) override;
-  ~GeVM() override = default;
-};
-
 // An execution frame.
 // This holds the state for an application of a graph. The nodes list
 // must contain free variables of graphs encountered before the
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@ -22,7 +22,7 @@ from mindspore import context
 from mindspore import log as logger
 from mindspore.parallel._utils import _get_parallel_mode
 from .._c_expression import generate_key, Executor_, Tensor, MetaTensor
-from .._c_expression import verify_inputs_signature, init_exec_dataset, export_graph, _set_dataset_mode_config, init_ge
+from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge
 from .tensor import Tensor as MsTensor

 # store ms_function class compiled pipeline cache
@ -501,6 +501,7 @@ class _Executor:
            file_name (str): File name of model to export
            file_format (str): MindSpore currently support 'GEIR' and 'ONNX' format for exported model
        """
+        from .._c_expression import export_graph
        phase = 'export' + '.' + str(net.create_time)
        export_graph(file_name, file_format, phase)

--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@ -155,6 +155,18 @@ class Parameter:
    def data(self):
        return self.default_input

+    def __add__(self, other):
+        return self.default_input + other
+
+    def __sub__(self, other):
+        return self.default_input - other
+
+    def __mul__(self, other):
+        return self.default_input * other
+
+    def __truediv__(self, other):
+        return self.default_input / other
+
    def set_parameter_data(self, data):
        if isinstance(data, (Tensor, list, int, float,
                             np.float16, np.float32, np.int32, np.int16, np.ndarray)) and not isinstance(data, bool):
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@ -89,6 +89,16 @@ class Tensor(Tensor_):
        out = self.__mul__(other)
        return out

+    def __truediv__(self, other):
+        if isinstance(other, (int, float)):
+            other_tensor = Tensor(other, self.dtype())
+        elif isinstance(other, Tensor):
+            other_tensor = other
+        else:
+            raise TypeError("unsupported type for div operation")
+        out = tensor_operator_registry.get('__div__')(self, other_tensor)
+        return out
+
    def __sub__(self, other):
        if not isinstance(other, Tensor):
            raise TypeError("input_data must be a tensor")
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@ -125,5 +125,5 @@ shape_mul = Primitive("shape_mul")
 stop_gradient = Primitive("stop_gradient")

 tensor_operator_registry.register('__add__', tensor_add)
-
 tensor_operator_registry.register('__mul__', tensor_mul)
+tensor_operator_registry.register('__div__', tensor_div)
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@ -161,6 +161,9 @@ class Model:

    def _update_metrics(self, outputs):
        """Update metrics local values."""
+        if not isinstance(outputs, tuple):
+            raise ValueError("The `outputs` is not tuple.")
+
        if self._eval_indexes is not None and len(outputs) < 3:
            raise ValueError("The length of `outputs` must be greater than or equal to 3, \
                             but got {}".format(len(outputs)))
--- a/tests/ut/cpp/device/ascend_kernel_select_test.cc
+++ b/tests/ut/cpp/device/ascend_kernel_select_test.cc
@ -231,7 +231,7 @@ void test_select(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
  AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get());
 }

-void SetParentAbstract(std::vector<AnfNodePtr> parent_list, std::vector<vector<size_t>> shapes,
+void SetParentAbstract(std::vector<AnfNodePtr> parent_list, std::vector<std::vector<size_t>> shapes,
                       std::vector<TypeId> types) {
  for (const auto &node : parent_list) {
    AnfAlgo::SetOutputInferTypeAndShape(types, shapes, node.get());
--- a/tests/ut/cpp/device/ascend_profiling_test.cc
+++ b/tests/ut/cpp/device/ascend_profiling_test.cc
@ -16,10 +16,10 @@
 #include <iostream>
 #include <memory>

+#include "./prof_reporter.h"
 #include "common/common_test.h"
 #include "device/ascend/profiling/profiling_manager.h"
 #include "./common.h"
-#include "./prof_reporter.h"
 #define private public
 #include "device/ascend/profiling/plugin_impl.h"
 #undef private
--- a/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
@ -20,7 +20,7 @@
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "session/anf_runtime_algorithm.h"
-#include "pre_activate/ascend/ir_fusion/allreduce_fusion.h"
+#include "pre_activate/common/ir_fusion/allreduce_fusion.h"
 #include "pre_activate/common/optimizer.h"
 #include "device/kernel_info.h"
 #include "pre_activate/common/pass_manager.h"
--- a/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
+++ b/tests/ut/cpp/pre_activate/pass/convert_const_input_to_tensor_input_test.cc
@ -105,7 +105,7 @@ TEST_F(TestHWConstInputToTensorInput, test_value_tuple_tensor_input) {
  auto tensor = input1->cast<ValueNodePtr>()->value()->cast<tensor::TensorPtr>();
  ASSERT_TRUE(tensor != nullptr);
  auto data = tensor->data_c(false);
-  EXPECT_EQ(vector<int>((int *)data, (int *)data + 4), vector<int>({2, 4, 2, 2}));
+  EXPECT_EQ(std::vector<int>((int *)data, (int *)data + 4), std::vector<int>({2, 4, 2, 2}));
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/tests/ut/python/ir/test_tensor.py
+++ b/tests/ut/python/ir/test_tensor.py
@ -24,6 +24,8 @@ import pytest
 import mindspore as ms
 import mindspore.common.api as me
 import mindspore.nn as nn
+from mindspore.common.parameter import Parameter
+from mindspore.common.initializer import initializer
 from ..ut_filter import non_graph_engine


@ -199,6 +201,21 @@ def test_sub():
    z = x - y
    assert isinstance(z, ms.Tensor)

+@non_graph_engine
+def test_div():
+    x = ms.Tensor(np.array([[2,6,10],[12, 4, 8]]).astype(np.float32))
+    y = ms.Tensor(np.array([[2,2,5],[6, 1, 2]]).astype(np.float32))
+    z = x / y
+    z2 = x / 2
+    assert isinstance(z, ms.Tensor)
+    assert isinstance(z2, ms.Tensor)
+
+@non_graph_engine
+def test_parameter():
+    x = Parameter(initializer(1, [1], ms.float32), name="beta1_power")
+    z = x / 2
+    print(z)
+

 class Net(nn.Cell):
    """Net definition"""
@ -378,3 +395,4 @@ def test_tensor_dtype_fp32_to_bool():
        input = np.random.randn(2, 3, 4, 5).astype(np.float32)
        input = ms.Tensor(input)
        input_me = ms.Tensor(input, dtype=ms.bool_)
+
--- a/tests/ut/python/ops/test_array_ops.py
+++ b/tests/ut/python/ops/test_array_ops.py
@ -97,20 +97,6 @@ def test_select():
    assert np.all(output.asnumpy() == expect)


-def test_scalar_cast_grad():
-    """ test_scalar_cast_grad """
-    input_x = 255.5
-    input_t = get_py_obj_dtype(ms.int8)
-
-    def fx_cast(x):
-        output = F.scalar_cast(x, input_t)
-        return output
-
-    gfn = C.grad(fx_cast)(input_x)
-    expect_dx = 1
-    assert gfn == expect_dx
-
-
 class CustomOP(PrimitiveWithInfer):
    __mindspore_signature__ = (sig_dtype.T, sig_dtype.T, sig_dtype.T1,
                               sig_dtype.T1, sig_dtype.T2, sig_dtype.T2,
--- a/tests/ut/python/parallel/init.py
+++ b/tests/ut/python/parallel/init.py
@ -13,11 +13,14 @@
 # limitations under the License.

 import mindspore.context as context
+from mindspore.parallel._utils import _reset_op_id


 def setup_module(module):
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
+    _reset_op_id()


 def teardown_module():
    context.reset_auto_parallel_context()
+    _reset_op_id()
--- a/tests/ut/python/parallel/test_alltoall.py
+++ b/tests/ut/python/parallel/test_alltoall.py
@ -97,13 +97,10 @@ def test_all_to_all():
    strategys = all_to_all_common(strategy1)
    print(strategys)
    expect_dict = {'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits'
-                   '/SoftmaxCrossEntropyWithLogits-op43': [[8, 1], [8, 1]],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits'
-                   '/OneHot-op44': [[8, 1], [], []],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op1':
-                       [[8, 1]],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0':
-                       [[1, 1], [1, 8]]}
+                   '/SoftmaxCrossEntropyWithLogits-op3': [[8, 1], [8, 1]],
+                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op4': [[8, 1], [], []],
+                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op1': [[8, 1]],
+                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op0': [[1, 1], [1, 8]]}
    assert (strategys == expect_dict)
    context.set_context(save_graphs=False)

--- a/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
+++ b/tests/ut/python/parallel/test_auto_parallel_arithmetic.py
@ -65,8 +65,8 @@ def test_auto_parallel_arithmetic():
    b = Tensor(np.ones([64, 128]), dtype=ms.float32)
    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/FloorDiv-op2': [[2, 4], [2, 4]],
-                     'Default/network-Net/MatMul-op3': [[2, 1], [1, 4]]}
+    expected_strategies = {'Default/network-Net/FloorDiv-op0': [[2, 4], [2, 4]],
+                     'Default/network-Net/MatMul-op1': [[2, 1], [1, 4]]}
    assert strategies == expected_strategies

 def test_auto_parallel_arithmetic_broadcast_both():
@ -91,8 +91,8 @@ def test_auto_parallel_arithmetic_broadcast_both():
    b = Tensor(np.ones([1, 64]), dtype=ms.float32)
    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/FloorDiv-op2': [[8, 1], [1, 1]],
-                           'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]]}
+    expected_strategies = {'Default/network-Net/FloorDiv-op0': [[8, 1], [1, 1]],
+                           'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]]}
    assert strategies == expected_strategies


@ -118,8 +118,8 @@ def test_auto_parallel_arithmetic_broadcast_right():
    b = Tensor(np.ones([32]), dtype=ms.float32)
    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/FloorDiv-op2': [[4, 2], [2]],
-                           'Default/network-Net/MatMul-op3': [[4, 1], [1, 2]]}
+    expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [2]],
+                           'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
    assert strategies == expected_strategies


@ -145,6 +145,6 @@ def test_auto_parallel_arithmetic_broadcast_left():
    b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
    _executor.compile(net, x, y, b, phase="train")
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/FloorDiv-op2': [[4, 2], [1, 4, 2]],
-                           'Default/network-Net/MatMul-op3': [[4, 1], [1, 2]]}
+    expected_strategies = {'Default/network-Net/FloorDiv-op0': [[4, 2], [1, 4, 2]],
+                           'Default/network-Net/MatMul-op1': [[4, 1], [1, 2]]}
    assert strategies == expected_strategies
--- a/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
+++ b/tests/ut/python/parallel/test_auto_parallel_assign_sub_with_ref_key.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import re
 import numpy as np
 from mindspore import context
 import mindspore.nn as nn
@ -55,6 +56,9 @@ def test_auto_parallel_assign_sub_with_ref_key():

    _executor.compile(net, x, phase="train")
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-PReLU/PReLU-op2': [[1, 1, 1, 8], [1]],
-                           'Default/network-PReLU/ReLU-op3': [[1]]}
-    assert strategies == expected_strategies
+    for (k, v) in strategies.items():
+        if re.search('PReLU-op', k) is not None:
+            assert v == [[1, 1, 1, 8], [1]]
+        elif re.search('ReLU-op', k) is not None:
+            assert v == [[1]]
+
--- a/tests/ut/python/parallel/test_auto_parallel_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_cast.py
@ -75,9 +75,9 @@ def test_double_star_graph():

    _executor.compile(net, x, y, z, w, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/MatMul-op0': [[1, 8], [8, 1]],
-                           'Default/network-Net/Cast-op7': [[8, 1]],
-                           'Default/network-Net/MatMul-op8': [[8, 1], [1, 1]],
-                           'Default/network-Net/Cast-op9': [[1, 8]],
-                           'Default/network-Net/MatMul-op10': [[1, 1], [1, 8]]}
+    expected_strategies = {'Default/network-Net/Cast-op1': [[8, 1]],
+                           'Default/network-Net/Cast-op3': [[1, 8]],
+                           'Default/network-Net/MatMul-op2': [[8, 1], [1, 1]],
+                           'Default/network-Net/MatMul-op4': [[1, 1], [1, 8]],
+                           'Default/network-Net/MatMul-op0': [[1, 8], [8, 1]]}
    assert strategies == expected_strategies
--- a/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
+++ b/tests/ut/python/parallel/test_auto_parallel_matmul_prelu.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import re
 import numpy as np
 from mindspore import context
 import mindspore.nn as nn
@ -66,7 +67,10 @@ def test_matmul_prelu():

    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    assert strategies['Default/network-Net/PReLU-op2'] == [[16, 1, 1, 1], [1]]
-    assert strategies['Default/network-Net/Mul-op3'] == [[16, 1, 1, 1], [16, 1, 1, 1]]
+    for (k, v) in strategies.items():
+        if re.search('PReLU-op', k) is not None:
+            assert v == [[16, 1, 1, 1], [1]]
+        elif re.search('Mul-op', k) is not None:
+            assert v == [[16, 1, 1, 1], [16, 1, 1, 1]]


--- a/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
+++ b/tests/ut/python/parallel/test_auto_parallel_parameter_cast.py
@ -80,9 +80,9 @@ def test_common_parameter():

    _executor.compile(net, x, y, z, w, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/MatMul-op6': [[8, 1], [1, 1]],
-                           'Default/network-Net/MatMul-op8': [[8, 1], [1, 1]],
-                           'Default/network-Net/Cast-op7': [[1, 1]],
+    expected_strategies = {'Default/network-Net/MatMul-op1': [[8, 1], [1, 1]],
+                           'Default/network-Net/MatMul-op3': [[8, 1], [1, 1]],
+                           'Default/network-Net/Cast-op2': [[1, 1]],
                           'Default/network-Net/MatMul-op0': [[8, 1], [1, 1]],
-                           'Default/network-Net/Cast-op9': [[1, 1]]}
+                           'Default/network-Net/Cast-op4': [[1, 1]]}
    assert strategies == expected_strategies
--- a/tests/ut/python/parallel/test_auto_parallel_transpose.py
+++ b/tests/ut/python/parallel/test_auto_parallel_transpose.py
@ -71,8 +71,8 @@ def test_two_matmul_transpose():

    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/Transpose-op4': [[1, 16]],
-                           'Default/network-Net/Transpose-op5': [[16, 1]],
-                           'Default/network-Net/MatMul-op6': [[16, 1], [1, 1]],
-                           'Default/network-Net/MatMul-op7': [[16, 1], [1, 1]]}
+    expected_strategies = {'Default/network-Net/Transpose-op0': [[1, 16]],
+                           'Default/network-Net/Transpose-op1': [[16, 1]],
+                           'Default/network-Net/MatMul-op2': [[16, 1], [1, 1]],
+                           'Default/network-Net/MatMul-op3': [[16, 1], [1, 1]]}
    assert strategies == expected_strategies
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@ -135,7 +135,6 @@ def test_two_matmul():
    
    _executor.compile(net, x, y, b, phase='train')
    strategies = _executor._get_strategy(net)
-    expected_strategies = {'Default/network-Net/MatMul-op2': [[16, 1], [1, 1]],
-                     'Default/network-Net/MatMul-op3': [[16, 1], [1, 1]]}
+    expected_strategies = {'Default/network-Net/MatMul-op0': [[16, 1], [1, 1]],
+                     'Default/network-Net/MatMul-op1': [[16, 1], [1, 1]]}
    assert strategies == expected_strategies
-
--- a/tests/ut/python/parallel/test_dataset_interface.py
+++ b/tests/ut/python/parallel/test_dataset_interface.py
@ -84,7 +84,7 @@ def loss_scale_manager_common(strategy1):
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    scale_manager = DynamicLossScaleManager(32, 2, 2000)
    model = Model(net, loss, opt, loss_scale_manager=scale_manager)
-    # if no GE exists, outputs = self._train_network(*next_element) outputs is None, TypeError is caught.
+    # if no GE exists, outputs = self._train_network(*next_element) outputs inputs tensor.
    try:
        model.train(epoch_size, dataset, dataset_sink_mode=False)
    except TypeError:
--- a/tests/ut/python/parallel/test_one_dev.py
+++ b/tests/ut/python/parallel/test_one_dev.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import re
 from mindspore.train import Model, ParallelMode
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
@ -89,16 +90,13 @@ def all_to_all_common():


 def test_one_dev():
-
    _reset_op_id()
-    strategys = all_to_all_common()
-    expect_dict = {'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits'
-                   '/SoftmaxCrossEntropyWithLogits-op9': [[1, 1], [1, 1]],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits'
-                   '/OneHot-op10': [[1, 1], [], []],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/Transpose-op11':
-                       [[1, 1]],
-                   'Default/network-_VirtualDatasetCell/_backbone-WithLossCell/_backbone-AllToAllNet/MatMul-op12':
-                       [[1, 1], [1, 1]]}
-    assert (strategys == expect_dict)
+    strategies = all_to_all_common()
+    for (k, v) in strategies.items():
+        if re.search('SoftmaxCrossEntropyWithLogits-op', k) is not None:
+            assert v == [[1, 1], [1, 1]]
+        elif re.search('Transpose-op', k) is not None:
+            assert v == [[1, 1]]
+        elif re.search('MatMul-op', k) is not None:
+            assert v == [[1, 1], [1, 1]]

--- a/tests/ut/python/pipeline/parse/test_create_obj.py
+++ b/tests/ut/python/pipeline/parse/test_create_obj.py
@ -24,6 +24,7 @@
 import logging
 import numpy as np
 import mindspore.nn as nn
+from mindspore import context
 from mindspore.ops import operations as P
 from mindspore.common.api import ms_function
 from mindspore.common.tensor import Tensor
@ -50,6 +51,7 @@ class Net(nn.Cell):
 def test_create_cell_object_on_construct():
    """ test_create_cell_object_on_construct """
    log.debug("begin test_create_object_on_construct")
+    context.set_context(mode=context.GRAPH_MODE)
    np1 = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me = Tensor(np1)

@ -118,6 +120,7 @@ class NetC(nn.Cell):
 def test_create_cell_object_on_construct_use_many_parameter():
    """ test_create_cell_object_on_construct_use_many_parameter """
    log.debug("begin test_create_object_on_construct")
+    context.set_context(mode=context.GRAPH_MODE)
    np1 = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me = Tensor(np1)

--- a/tests/ut/python/pipeline/parse/test_dtype.py
+++ b/tests/ut/python/pipeline/parse/test_dtype.py
@ -28,5 +28,4 @@ def try_type():


 def test_dtype_convert():
-    with pytest.raises(RuntimeError):
-        try_type()
+    try_type()
--- a/tests/ut/python/pynative_mode/ops/test_grad.py
+++ b/tests/ut/python/pynative_mode/ops/test_grad.py
@ -19,8 +19,10 @@ from mindspore.common.api import ms_function
 from mindspore import Tensor
 from mindspore.ops import composite as C
 from mindspore.ops.composite import grad_all_with_sens
+from mindspore.common.dtype import get_py_obj_dtype
 import mindspore.nn as nn
 import mindspore.ops.operations as P
+from mindspore.ops import functional as F
 from ...ut_filter import non_graph_engine


@ -78,6 +80,20 @@ def test_cast_grad():
    assert np.all(gout[0].asnumpy() == expect)


+def test_scalar_cast_grad():
+    """ test_scalar_cast_grad """
+    input_x = 255.5
+    input_t = get_py_obj_dtype(ms.int8)
+
+    def fx_cast(x):
+        output = F.scalar_cast(x, input_t)
+        return output
+
+    gfn = C.grad(fx_cast)(input_x)
+    expect_dx = 1
+    assert gfn == expect_dx
+
+
@non_graph_engine
 def test_reshape_grad():
    """ test_reshape_grad """
--- a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py
+++ b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py
@ -163,12 +163,7 @@ def test_scalar_summary_use_invalid_tag_None():
 def test_scalar_summary_use_invalid_tag_Bool():
    log.debug("begin test_scalar_summary_use_invalid_tag_Bool")
    net = SummaryDemoTag(True, True, True)
-    try:
-        run_case(net)
-    except:
-        assert True
-    else:
-        assert False
+    run_case(net)
    log.debug("finished test_scalar_summary_use_invalid_tag_Bool")


@ -176,12 +171,7 @@ def test_scalar_summary_use_invalid_tag_Bool():
 def test_scalar_summary_use_invalid_tag_null():
    log.debug("begin test_scalar_summary_use_invalid_tag_null")
    net = SummaryDemoTag("", "", "")
-    try:
-        run_case(net)
-    except:
-        assert True
-    else:
-        assert False
+    run_case(net)
    log.debug("finished test_scalar_summary_use_invalid_tag_null")


@ -189,12 +179,7 @@ def test_scalar_summary_use_invalid_tag_null():
 def test_scalar_summary_use_invalid_tag_Int():
    log.debug("begin test_scalar_summary_use_invalid_tag_Int")
    net = SummaryDemoTag(1, 2, 3)
-    try:
-        run_case(net)
-    except:
-        assert True
-    else:
-        assert False
+    run_case(net)
    log.debug("finished test_scalar_summary_use_invalid_tag_Int")


--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@ -30,7 +30,7 @@ from mindspore.nn import WithLossCell, TrainOneStepCell
 from mindspore.train.callback import _CheckpointManager
 from mindspore.train.serialization import save_checkpoint, load_checkpoint,load_param_into_net, \
                                          _exec_save_checkpoint, export, _save_graph
-from ..ut_filter import run_on_onnxruntime
+from ..ut_filter import run_on_onnxruntime, non_graph_engine
 from mindspore import context


@ -306,6 +306,7 @@ class MYNET(nn.Cell):
        return out


+@non_graph_engine
 def test_export():
    net = MYNET()
    input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]).astype(np.float32))